Intro

Calculate response (feeding ration, total weight of prey groups) and predictor variables for diet data, aggregate to get 1 stomach = 1 row

# Load libraries, install if needed
library(tidyverse); theme_set(theme_light(base_size = 10))
library(readxl)
library(tidylog)
library(RCurl)
library(viridis)
library(RColorBrewer)
library(patchwork)
library(janitor)
library(icesDatras)
library(mapdata)
library(patchwork)
library(rgdal)
library(raster)
library(sf)
library(rgeos)
library(chron)
library(lattice)
library(ncdf4)
library(marmap)
library(rnaturalearth)
library(rnaturalearthdata)
library(mapplots)
library(EnvStats)
library(qwraps2)
library(forcats)
#remotes::install_github("pbs-assess/sdmTMB")
library(sdmTMB)# To load entire cache in interactive r session, do: qwraps2::lazyload_cache_dir(path = "R/analysis/spatial_trend_models_cache/html")

For maps

# Specify map ranges
ymin = 55; ymax = 58; xmin = 14; xmax = 20

map_data <- rnaturalearth::ne_countries(
  scale = "medium",
  returnclass = "sf", continent = "europe")

# Crop the polygon for plotting and efficiency:
# st_bbox(map_data) # find the rough coordinates
swe_coast <- suppressWarnings(suppressMessages(
  st_crop(map_data,
          c(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax))))

# Transform our map into UTM 33 coordinates, which is the equal-area projection we fit in:
utm_zone33 <- 32633
swe_coast_proj <- sf::st_transform(swe_coast, crs = utm_zone33)

ggplot(swe_coast_proj) + geom_sf() 


# Define plotting theme for main plot
theme_plot <- function(base_size = 10, base_family = "") {
  theme_light(base_size = 10, base_family = "") +
    theme(
      axis.text.x = element_text(angle = 90),
      axis.text = element_text(size = 8),
      legend.text = element_text(size = 8),
      legend.title = element_text(size = 8),
      legend.position = "bottom",
      legend.key.height = unit(0.2, "cm"),
      legend.margin = margin(0, 0, 0, 0),
      legend.box.margin = margin(-5, -5, -5, -5),
      strip.text = element_text(size = 8, colour = 'black', margin = margin()),
      strip.background = element_rect(fill = "grey90")
      )
}

# Define plotting theme for facet_wrap map with years
theme_facet_map <- function(base_size = 10, base_family = "") {
  theme_light(base_size = 10, base_family = "") +
    theme(
        axis.text.x = element_text(angle = 90),
        axis.text = element_text(size = 6),
        strip.text = element_text(size = 8, colour = 'black', margin = margin()),
        strip.background = element_rect(fill = "grey90")
      )
}

# Make default base map plot
plot_map_raster <- 
ggplot(swe_coast_proj) + 
  geom_sf(size = 0.3) +
  labs(x = "Longitude", y = "Latitude") +
  theme_facet_map(base_size = 14)

# Function to convert from lat lon to UTM
LongLatToUTM <- function(x, y, zone){
  xy <- data.frame(ID = 1:length(x), X = x, Y = y)
  coordinates(xy) <- c("X", "Y")
  proj4string(xy) <- CRS("+proj=longlat +datum=WGS84")  ## for example
  res <- spTransform(xy, CRS(paste("+proj=utm +zone=",zone," ellps=WGS84",sep='')))
  return(as.data.frame(res))
}

Read data

d <- readr::read_csv("/Users/maxlindmark/Desktop/R_STUDIO_PROJECTS/bentfish/data/for_analysis/stomach/full_stomach_data_21.10.25.csv") %>% dplyr::select(-X1)
#> 
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#>   .default = col_double(),
#>   Sample = col_character(),
#>   Length.code = col_character(),
#>   Prey.sp.code = col_character(),
#>   Comment = col_character(),
#>   Parasites.in.stomach = col_character(),
#>   Coun. = col_character(),
#>   Ship = col_logical(),
#>   Cruise = col_character(),
#>   Predator.code = col_character(),
#>   Sex = col_character(),
#>   Q.year = col_character(),
#>   Date = col_date(format = ""),
#>   Index = col_character(),
#>   Ices.rect = col_character(),
#>   source = col_character(),
#>   Number = col_logical(),
#>   Sample.type = col_logical(),
#>   transect = col_logical(),
#>   Depthstep = col_logical(),
#>   Gonad.weight.roundfish = col_logical()
#>   # ... with 7 more columns
#> )
#> ℹ Use `spec()` for the full column specifications.

d %>% filter(Year == 1990) %>% as.data.frame()
#> filter: removed 26,469 rows (>99%), 19 rows remaining
#>    SubDiv Year Month Day N.with.food N.regurgitated N.skeletal N.empty HN
#> 1      28 1990     3  18           1             NA         NA      NA 13
#> 2      28 1990     3  18           1             NA         NA      NA 13
#> 3      28 1990     3  18           1             NA         NA      NA 13
#> 4      28 1990     3  18           1             NA         NA      NA 13
#> 5      28 1990     3  18           1             NA         NA      NA 13
#> 6      28 1990     3  18           1             NA         NA      NA 13
#> 7      28 1990     3  18           1             NA         NA      NA 13
#> 8      26 1990     3  22           1             NA         NA      NA 19
#> 9      26 1990     3  22           1             NA         NA      NA 19
#> 10     26 1990     3  22           1             NA         NA      NA 19
#> 11     26 1990     3  22           1             NA         NA      NA 19
#> 12     26 1990     3  22           1             NA         NA      NA 19
#> 13     26 1990     3  22           1             NA         NA      NA 19
#> 14     26 1990     3  22           1             NA         NA      NA 19
#> 15     26 1990     3  22           1             NA         NA      NA 19
#> 16     26 1990     3  22           1             NA         NA      NA 19
#> 17     26 1990     3  22           1             NA         NA      NA 19
#> 18     26 1990     3  22           1             NA         NA      NA 19
#> 19     26 1990     3  22           1             NA         NA      NA 19
#>                                  Sample Length.code         Prey.sp.code
#> 1   1990_3_18_13_BPE_43G9_2_NA_560_NA_1        <NA>    Sprattus sprattus
#> 2  1990_3_18_13_BPE_43G9_20_NA_720_NA_1        <NA>               Pisces
#> 3  1990_3_18_13_BPE_43G9_20_NA_720_NA_1        <NA>    Sprattus sprattus
#> 4  1990_3_18_13_BPE_43G9_27_NA_560_NA_1        <NA>    Sprattus sprattus
#> 5  1990_3_18_13_BPE_43G9_39_NA_580_NA_1        <NA>    Sprattus sprattus
#> 6  1990_3_18_13_BPE_43G9_41_NA_490_NA_1        <NA>    Sprattus sprattus
#> 7  1990_3_18_13_BPE_43G9_45_NA_490_NA_1        <NA>    Sprattus sprattus
#> 8   1990_3_22_19_BPE_40G8_1_NA_960_NA_1        <NA>   Platichthys flesus
#> 9   1990_3_22_19_BPE_40G8_1_NA_960_NA_1        <NA>    Sprattus sprattus
#> 10 1990_3_22_19_BPE_40G8_11_NA_660_NA_1        <NA>    Sprattus sprattus
#> 11 1990_3_22_19_BPE_40G8_12_NA_630_NA_1        <NA> Enchelyopus cimbrius
#> 12  1990_3_22_19_BPE_40G8_2_NA_570_NA_1        <NA>    Sprattus sprattus
#> 13 1990_3_22_19_BPE_40G8_28_NA_610_NA_1        <NA>      Clupea harengus
#> 14 1990_3_22_19_BPE_40G8_35_NA_450_NA_1        <NA>    Sprattus sprattus
#> 15 1990_3_22_19_BPE_40G8_39_NA_550_NA_1        <NA>    Sprattus sprattus
#> 16 1990_3_22_19_BPE_40G8_41_NA_630_NA_1        <NA>    Sprattus sprattus
#> 17 1990_3_22_19_BPE_40G8_52_NA_440_NA_1        <NA>    Sprattus sprattus
#> 18 1990_3_22_19_BPE_40G8_53_NA_560_NA_1        <NA>    Sprattus sprattus
#> 19  1990_3_22_19_BPE_40G8_8_NA_690_NA_1        <NA>    Sprattus sprattus
#>    Prey.size Prey.weight Prey.nr Stage.digestion Comment Parasites.in.stomach
#> 1         NA        2.18      NA              NA    <NA>                 <NA>
#> 2         NA       30.69      NA              NA    <NA>                 <NA>
#> 3         NA       12.00      NA              NA    <NA>                 <NA>
#> 4         NA       70.51      NA              NA    <NA>                 <NA>
#> 5         NA       15.55      NA              NA    <NA>                 <NA>
#> 6         NA       25.60      NA              NA    <NA>                 <NA>
#> 7         NA       13.93      NA              NA    <NA>                 <NA>
#> 8         NA       21.69      NA              NA    <NA>                 <NA>
#> 9         NA       86.79      NA              NA    <NA>                 <NA>
#> 10        NA       41.96      NA              NA    <NA>                 <NA>
#> 11        NA       30.33      NA              NA    <NA>                 <NA>
#> 12        NA       85.47      NA              NA    <NA>                 <NA>
#> 13        NA       25.17      NA              NA    <NA>                 <NA>
#> 14        NA       14.82      NA              NA    <NA>                 <NA>
#> 15        NA       54.79      NA              NA    <NA>                 <NA>
#> 16        NA        8.67      NA              NA    <NA>                 <NA>
#> 17        NA        9.25      NA              NA    <NA>                 <NA>
#> 18        NA       14.01      NA              NA    <NA>                 <NA>
#> 19        NA       22.23      NA              NA    <NA>                 <NA>
#>    Quarter Coun. Ship  Cruise Pred.size.mm Pred.weight.g Predator.gutted.weight
#> 1        1   LAT   NA No name          560            NA                     NA
#> 2        1   LAT   NA No name          720            NA                     NA
#> 3        1   LAT   NA No name          720            NA                     NA
#> 4        1   LAT   NA No name          560            NA                     NA
#> 5        1   LAT   NA No name          580            NA                     NA
#> 6        1   LAT   NA No name          490            NA                     NA
#> 7        1   LAT   NA No name          490            NA                     NA
#> 8        1   LAT   NA No name          960            NA                     NA
#> 9        1   LAT   NA No name          960            NA                     NA
#> 10       1   LAT   NA No name          660            NA                     NA
#> 11       1   LAT   NA No name          630            NA                     NA
#> 12       1   LAT   NA No name          570            NA                     NA
#> 13       1   LAT   NA No name          610            NA                     NA
#> 14       1   LAT   NA No name          450            NA                     NA
#> 15       1   LAT   NA No name          550            NA                     NA
#> 16       1   LAT   NA No name          630            NA                     NA
#> 17       1   LAT   NA No name          440            NA                     NA
#> 18       1   LAT   NA No name          560            NA                     NA
#> 19       1   LAT   NA No name          690            NA                     NA
#>    Predator.code  Sex Maturity Gall.content Q.year Age Date Index   Lat Long
#> 1            COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 57.25 19.5
#> 2            COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 57.25 19.5
#> 3            COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 57.25 19.5
#> 4            COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 57.25 19.5
#> 5            COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 57.25 19.5
#> 6            COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 57.25 19.5
#> 7            COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 57.25 19.5
#> 8            COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 55.75 18.5
#> 9            COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 55.75 18.5
#> 10           COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 55.75 18.5
#> 11           COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 55.75 18.5
#> 12           COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 55.75 18.5
#> 13           COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 55.75 18.5
#> 14           COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 55.75 18.5
#> 15           COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 55.75 18.5
#> 16           COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 55.75 18.5
#> 17           COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 55.75 18.5
#> 18           COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 55.75 18.5
#> 19           COD <NA>       NA           NA 1_1990  NA <NA>  <NA> 55.75 18.5
#>    Depth.catch Ices.rect      source Number Sample.type transect Depthstep
#> 1           NA      43G9 Kevin's MSC     NA          NA       NA        NA
#> 2           NA      43G9 Kevin's MSC     NA          NA       NA        NA
#> 3           NA      43G9 Kevin's MSC     NA          NA       NA        NA
#> 4           NA      43G9 Kevin's MSC     NA          NA       NA        NA
#> 5           NA      43G9 Kevin's MSC     NA          NA       NA        NA
#> 6           NA      43G9 Kevin's MSC     NA          NA       NA        NA
#> 7           NA      43G9 Kevin's MSC     NA          NA       NA        NA
#> 8           NA      40G8 Kevin's MSC     NA          NA       NA        NA
#> 9           NA      40G8 Kevin's MSC     NA          NA       NA        NA
#> 10          NA      40G8 Kevin's MSC     NA          NA       NA        NA
#> 11          NA      40G8 Kevin's MSC     NA          NA       NA        NA
#> 12          NA      40G8 Kevin's MSC     NA          NA       NA        NA
#> 13          NA      40G8 Kevin's MSC     NA          NA       NA        NA
#> 14          NA      40G8 Kevin's MSC     NA          NA       NA        NA
#> 15          NA      40G8 Kevin's MSC     NA          NA       NA        NA
#> 16          NA      40G8 Kevin's MSC     NA          NA       NA        NA
#> 17          NA      40G8 Kevin's MSC     NA          NA       NA        NA
#> 18          NA      40G8 Kevin's MSC     NA          NA       NA        NA
#> 19          NA      40G8 Kevin's MSC     NA          NA       NA        NA
#>    Gonad.weight.roundfish Liver.weight.roundfish Stomach.content
#> 1                      NA                     NA              NA
#> 2                      NA                     NA              NA
#> 3                      NA                     NA              NA
#> 4                      NA                     NA              NA
#> 5                      NA                     NA              NA
#> 6                      NA                     NA              NA
#> 7                      NA                     NA              NA
#> 8                      NA                     NA              NA
#> 9                      NA                     NA              NA
#> 10                     NA                     NA              NA
#> 11                     NA                     NA              NA
#> 12                     NA                     NA              NA
#> 13                     NA                     NA              NA
#> 14                     NA                     NA              NA
#> 15                     NA                     NA              NA
#> 16                     NA                     NA              NA
#> 17                     NA                     NA              NA
#> 18                     NA                     NA              NA
#> 19                     NA                     NA              NA
#>    Perc.stomac.content comments Processed
#> 1                   NA       NA        NA
#> 2                   NA       NA        NA
#> 3                   NA       NA        NA
#> 4                   NA       NA        NA
#> 5                   NA       NA        NA
#> 6                   NA       NA        NA
#> 7                   NA       NA        NA
#> 8                   NA       NA        NA
#> 9                   NA       NA        NA
#> 10                  NA       NA        NA
#> 11                  NA       NA        NA
#> 12                  NA       NA        NA
#> 13                  NA       NA        NA
#> 14                  NA       NA        NA
#> 15                  NA       NA        NA
#> 16                  NA       NA        NA
#> 17                  NA       NA        NA
#> 18                  NA       NA        NA
#> 19                  NA       NA        NA
#>                                        Unique.pred.id
#> 1   1990_1_13_1990_3_18_13_BPE_43G9_2_NA_560_NA_1_COD
#> 2  1990_1_13_1990_3_18_13_BPE_43G9_20_NA_720_NA_1_COD
#> 3  1990_1_13_1990_3_18_13_BPE_43G9_20_NA_720_NA_1_COD
#> 4  1990_1_13_1990_3_18_13_BPE_43G9_27_NA_560_NA_1_COD
#> 5  1990_1_13_1990_3_18_13_BPE_43G9_39_NA_580_NA_1_COD
#> 6  1990_1_13_1990_3_18_13_BPE_43G9_41_NA_490_NA_1_COD
#> 7  1990_1_13_1990_3_18_13_BPE_43G9_45_NA_490_NA_1_COD
#> 8   1990_1_19_1990_3_22_19_BPE_40G8_1_NA_960_NA_1_COD
#> 9   1990_1_19_1990_3_22_19_BPE_40G8_1_NA_960_NA_1_COD
#> 10 1990_1_19_1990_3_22_19_BPE_40G8_11_NA_660_NA_1_COD
#> 11 1990_1_19_1990_3_22_19_BPE_40G8_12_NA_630_NA_1_COD
#> 12  1990_1_19_1990_3_22_19_BPE_40G8_2_NA_570_NA_1_COD
#> 13 1990_1_19_1990_3_22_19_BPE_40G8_28_NA_610_NA_1_COD
#> 14 1990_1_19_1990_3_22_19_BPE_40G8_35_NA_450_NA_1_COD
#> 15 1990_1_19_1990_3_22_19_BPE_40G8_39_NA_550_NA_1_COD
#> 16 1990_1_19_1990_3_22_19_BPE_40G8_41_NA_630_NA_1_COD
#> 17 1990_1_19_1990_3_22_19_BPE_40G8_52_NA_440_NA_1_COD
#> 18 1990_1_19_1990_3_22_19_BPE_40G8_53_NA_560_NA_1_COD
#> 19  1990_1_19_1990_3_22_19_BPE_40G8_8_NA_690_NA_1_COD
#>    coord_from_ices_rect_centre
#> 1                            Y
#> 2                            Y
#> 3                            Y
#> 4                            Y
#> 5                            Y
#> 6                            Y
#> 7                            Y
#> 8                            Y
#> 9                            Y
#> 10                           Y
#> 11                           Y
#> 12                           Y
#> 13                           Y
#> 14                           Y
#> 15                           Y
#> 16                           Y
#> 17                           Y
#> 18                           Y
#> 19                           Y

# Drop NA coordinates (because these do not even have ices-rectangle, see diet data processing script in benthfish project)
d <- d %>% drop_na(Long) %>% drop_na(Lat)
#> drop_na: removed 853 rows (3%), 25,635 rows remaining
#> drop_na: no rows removed

# Add UTM coordinates (first rename )
utm_coords <- LongLatToUTM(d$Long, d$Lat, zone = 33)
d$X <- utm_coords$X/1000
d$Y <- utm_coords$Y/1000

Plot data

head(data.frame(d))
#>   SubDiv Year Month Day N.with.food N.regurgitated N.skeletal N.empty HN Sample
#> 1     28 2017    11  27           1             NA         NA      NA 52    507
#> 2     28 2017    11  27           1             NA         NA      NA 52    507
#> 3     28 2017    11  27           1             NA         NA      NA 52    507
#> 4     28 2017    11  27           1             NA         NA      NA 52    507
#> 5     28 2017    11  27           1             NA         NA      NA 52    507
#> 6     28 2017    11  27           1             NA         NA      NA 52    507
#>   Length.code    Prey.sp.code Prey.size Prey.weight Prey.nr Stage.digestion
#> 1        <NA> Saduria entomon        20        0.21       1               1
#> 2        <NA> Saduria entomon        16        0.10       1               1
#> 3        <NA> Saduria entomon        20        0.15       1               1
#> 4        <NA> Saduria entomon        20        0.17       1               1
#> 5        <NA> Saduria entomon        NA        0.27       2               1
#> 6        <NA> Macoma balthica        NA        0.19       1               1
#>   Comment Parasites.in.stomach Quarter Coun. Ship Cruise Pred.size.mm
#> 1    <NA>                 <NA>       4   SWE   NA   BITS          200
#> 2    <NA>                 <NA>       4   SWE   NA   BITS          200
#> 3    <NA>                 <NA>       4   SWE   NA   BITS          200
#> 4    <NA>                 <NA>       4   SWE   NA   BITS          200
#> 5    <NA>                 <NA>       4   SWE   NA   BITS          200
#> 6    <NA>                 <NA>       4   SWE   NA   BITS          200
#>   Pred.weight.g Predator.gutted.weight Predator.code Sex Maturity Gall.content
#> 1           100                     92           FLE   M        4           NA
#> 2           100                     92           FLE   M        4           NA
#> 3           100                     92           FLE   M        4           NA
#> 4           100                     92           FLE   M        4           NA
#> 5           100                     92           FLE   M        4           NA
#> 6           100                     92           FLE   M        4           NA
#>   Q.year Age       Date        Index      Lat     Long Depth.catch Ices.rect
#> 1 4_2017   3 2017-11-27 OXBH.2017.52 57.46667 19.23333          67      43G9
#> 2 4_2017   3 2017-11-27 OXBH.2017.52 57.46667 19.23333          67      43G9
#> 3 4_2017   3 2017-11-27 OXBH.2017.52 57.46667 19.23333          67      43G9
#> 4 4_2017   3 2017-11-27 OXBH.2017.52 57.46667 19.23333          67      43G9
#> 5 4_2017   3 2017-11-27 OXBH.2017.52 57.46667 19.23333          67      43G9
#> 6 4_2017   3 2017-11-27 OXBH.2017.52 57.46667 19.23333          67      43G9
#>        source Number Sample.type transect Depthstep Gonad.weight.roundfish
#> 1 Max Postdoc     NA          NA       NA        NA                     NA
#> 2 Max Postdoc     NA          NA       NA        NA                     NA
#> 3 Max Postdoc     NA          NA       NA        NA                     NA
#> 4 Max Postdoc     NA          NA       NA        NA                     NA
#> 5 Max Postdoc     NA          NA       NA        NA                     NA
#> 6 Max Postdoc     NA          NA       NA        NA                     NA
#>   Liver.weight.roundfish Stomach.content Perc.stomac.content comments Processed
#> 1                     NA              NA                  NA       NA        NA
#> 2                     NA              NA                  NA       NA        NA
#> 3                     NA              NA                  NA       NA        NA
#> 4                     NA              NA                  NA       NA        NA
#> 5                     NA              NA                  NA       NA        NA
#> 6                     NA              NA                  NA       NA        NA
#>      Unique.pred.id coord_from_ices_rect_centre       X        Y
#> 1 2017_4_52_507_FLE                           N 753.841 6377.247
#> 2 2017_4_52_507_FLE                           N 753.841 6377.247
#> 3 2017_4_52_507_FLE                           N 753.841 6377.247
#> 4 2017_4_52_507_FLE                           N 753.841 6377.247
#> 5 2017_4_52_507_FLE                           N 753.841 6377.247
#> 6 2017_4_52_507_FLE                           N 753.841 6377.247
sort(colnames(d))
#>  [1] "Age"                         "Comment"                    
#>  [3] "comments"                    "coord_from_ices_rect_centre"
#>  [5] "Coun."                       "Cruise"                     
#>  [7] "Date"                        "Day"                        
#>  [9] "Depth.catch"                 "Depthstep"                  
#> [11] "Gall.content"                "Gonad.weight.roundfish"     
#> [13] "HN"                          "Ices.rect"                  
#> [15] "Index"                       "Lat"                        
#> [17] "Length.code"                 "Liver.weight.roundfish"     
#> [19] "Long"                        "Maturity"                   
#> [21] "Month"                       "N.empty"                    
#> [23] "N.regurgitated"              "N.skeletal"                 
#> [25] "N.with.food"                 "Number"                     
#> [27] "Parasites.in.stomach"        "Perc.stomac.content"        
#> [29] "Pred.size.mm"                "Pred.weight.g"              
#> [31] "Predator.code"               "Predator.gutted.weight"     
#> [33] "Prey.nr"                     "Prey.size"                  
#> [35] "Prey.sp.code"                "Prey.weight"                
#> [37] "Processed"                   "Q.year"                     
#> [39] "Quarter"                     "Sample"                     
#> [41] "Sample.type"                 "Sex"                        
#> [43] "Ship"                        "source"                     
#> [45] "Stage.digestion"             "Stomach.content"            
#> [47] "SubDiv"                      "transect"                   
#> [49] "Unique.pred.id"              "X"                          
#> [51] "Y"                           "Year"
#  [1] "Age"                    "Comment"                "comments"               "Coun."                 
#  [5] "Cruise"                 "Date"                   "Day"                    "Depth.catch"           
#  [9] "Depthstep"              "Gall.content"           "Gonad.weight.roundfish" "HN"                    
# [13] "Ices.rect"              "Index"                  "Lat"                    "Length.code"           
# [17] "Liver.weight.roundfish" "Long"                   "Maturity"               "Month"                 
# [21] "N.empty"                "N.regurgitated"         "N.skeletal"             "N.with.food"           
# [25] "Number"                 "Parasites.in.stomach"   "Perc.stomac.content"    "Pred.size.mm"          
# [29] "Pred.weight.g"          "Predator.code"          "Predator.gutted.weight" "Prey.nr"               
# [33] "Prey.size"              "Prey.sp.code"           "Prey.weight"            "Processed"             
# [37] "Q.year"                 "Quarter"                "Sample"                 "Sample.type"           
# [41] "Sex"                    "Ship"                   "source"                 "Stage.digestion"       
# [45] "Stomach.content"        "SubDiv"                 "transect"               "Unique.pred.id"        
# [49] "X"

# Plot data in space, color by survey
plot_map_raster +
  geom_point(data = d, aes(x = X * 1000, y = Y * 1000, color = Cruise), size = 0.5) +
  scale_color_brewer(palette = "Set2") + 
  facet_wrap(~ Cruise, ncol = 3) + 
  theme_plot()

Summarize and organize data

# Calculate total weight of prey by predator ID and prey species (i.e., across prey sizes)
# Create wide data frame so that I can sum easily across prey groups (columns)
# I.e., one row = one stomach from here!
d_wide <- d %>% 
  drop_na(Prey.weight) %>% 
  group_by(Unique.pred.id, Prey.sp.code) %>% 
  summarise(tot_biom_per_prey = sum(Prey.weight)) %>% 
  ungroup() %>% 
  pivot_wider(names_from = Prey.sp.code, values_from = tot_biom_per_prey) %>% 
  mutate(across(everything(), ~replace_na(.x, 0))) %>% # Replace NA with 0, because it means the prey does not exist
  janitor::clean_names()
#> drop_na: removed 1,216 rows (5%), 24,419 rows remaining
#> group_by: 2 grouping variables (Unique.pred.id, Prey.sp.code)
#> summarise: now 20,671 rows and 3 columns, one group variable remaining (Unique.pred.id)
#> ungroup: no grouping variables
#> pivot_wider: reorganized (Prey.sp.code, tot_biom_per_prey) into (Saduria entomon, Bylgides sarsi, Pontoporeia femorata, Clupea harengus, Mysis mixta, …) [was 20671x3, now 11189x117]
#> mutate: changed 8,723 values (78%) of 'Saduria entomon' (8723 fewer NA)
#>         changed 8,675 values (78%) of 'Bylgides sarsi' (8675 fewer NA)
#>         changed 10,425 values (93%) of 'Pontoporeia femorata' (10425 fewer NA)
#>         changed 10,271 values (92%) of 'Clupea harengus' (10271 fewer NA)
#>         changed 9,088 values (81%) of 'Mysis mixta' (9088 fewer NA)
#>         changed 10,702 values (96%) of 'Gammarus sp.' (10702 fewer NA)
#>         changed 9,454 values (84%) of 'Sprattus sprattus' (9454 fewer NA)
#>         changed 11,151 values (>99%) of 'Enchelyopus cimbrius' (11151 fewer NA)
#>         changed 10,857 values (97%) of 'Monoporeia affinis' (10857 fewer NA)
#>         changed 11,184 values (>99%) of 'Mysis relicta' (11184 fewer NA)
#>         changed 11,175 values (>99%) of 'Pontoporeia sp.' (11175 fewer NA)
#>         changed 11,028 values (99%) of 'Gadus morhua' (11028 fewer NA)
#>         changed 11,186 values (>99%) of 'Pisces Eggs' (11186 fewer NA)
#>         changed 11,165 values (>99%) of 'Hyperia galba' (11165 fewer NA)
#>         changed 10,336 values (92%) of 'Diastylis rathkei' (10336 fewer NA)
#>         changed 10,486 values (94%) of 'Halicryptus spinulosus' (10486 fewer NA)
#>         changed 10,184 values (91%) of 'Pisces' (10184 fewer NA)
#>         changed 11,187 values (>99%) of 'Pomatoschistus minutus' (11187 fewer NA)
#>         changed 10,206 values (91%) of 'Limecola balthica' (10206 fewer NA)
#>         changed 11,186 values (>99%) of 'Mollusca' (11186 fewer NA)
#>         changed 11,173 values (>99%) of 'Platichthys flesus' (11173 fewer NA)
#>         changed 10,860 values (97%) of 'Gasterosteus aculeatus' (10860 fewer NA)
#>         changed 10,875 values (97%) of 'Mysidae' (10875 fewer NA)
#>         changed 10,985 values (98%) of 'Neomysis integer' (10985 fewer NA)
#>         changed 10,723 values (96%) of 'Cumacea' (10723 fewer NA)
#>         changed 11,185 values (>99%) of 'Corophium volutator' (11185 fewer NA)
#>         changed 11,059 values (99%) of 'Stone' (11059 fewer NA)
#>         changed 10,803 values (97%) of 'Clupeidae' (10803 fewer NA)
#>         changed 10,777 values (96%) of 'Gobiidae' (10777 fewer NA)
#>         changed 10,379 values (93%) of 'NA' (10379 fewer NA)
#>         changed 11,143 values (>99%) of 'Scoloplos armiger' (11143 fewer NA)
#>         changed 11,174 values (>99%) of 'plastic' (11174 fewer NA)
#>         changed 11,094 values (99%) of 'Priapulus caudatus' (11094 fewer NA)
#>         changed 11,129 values (99%) of 'Bivalvia' (11129 fewer NA)
#>         changed 10,966 values (98%) of 'Mytilus sp.' (10966 fewer NA)
#>         changed 10,892 values (97%) of 'Crangon crangon' (10892 fewer NA)
#>         changed 11,140 values (>99%) of 'Idotea balthica' (11140 fewer NA)
#>         changed 11,187 values (>99%) of 'Trachurus trachurus' (11187 fewer NA)
#>         changed 11,187 values (>99%) of 'Annelida' (11187 fewer NA)
#>         changed 11,095 values (99%) of 'Algae' (11095 fewer NA)
#>         changed 11,185 values (>99%) of 'Priapulidae' (11185 fewer NA)
#>         changed 11,182 values (>99%) of 'Waste' (11182 fewer NA)
#>         changed 11,155 values (>99%) of 'Praunus flexuosus' (11155 fewer NA)
#>         changed 11,178 values (>99%) of 'Unidentified mass' (11178 fewer NA)
#>         changed 11,186 values (>99%) of 'Merlangius merlangus' (11186 fewer NA)
#>         changed 11,122 values (99%) of 'Scales' (11122 fewer NA)
#>         changed 11,186 values (>99%) of 'Gadidae' (11186 fewer NA)
#>         changed 11,133 values (99%) of 'Crustacea' (11133 fewer NA)
#>         changed 11,085 values (99%) of 'Amphipoda' (11085 fewer NA)
#>         changed 11,135 values (>99%) of 'Spine' (11135 fewer NA)
#>         changed 11,187 values (>99%) of 'Pleuronectes platessa' (11187 fewer NA)
#>         changed 11,188 values (>99%) of 'Anguilla anguilla' (11188 fewer NA)
#>         changed 11,188 values (>99%) of 'Empty' (11188 fewer NA)
#>         changed 11,186 values (>99%) of 'Carbon' (11186 fewer NA)
#>         changed 11,157 values (>99%) of 'Copepoda' (11157 fewer NA)
#>         changed 11,180 values (>99%) of 'Zoarces viviparus' (11180 fewer NA)
#>         changed 11,185 values (>99%) of 'Spinachia spinachia' (11185 fewer NA)
#>         changed 11,186 values (>99%) of 'Pungitius pungitius' (11186 fewer NA)
#>         changed 11,188 values (>99%) of 'Terebellides stroemii' (11188 fewer NA)
#>         changed 11,171 values (>99%) of 'Palaemon sp.' (11171 fewer NA)
#>         changed 11,183 values (>99%) of 'Ammodytes tobianus' (11183 fewer NA)
#>         changed 11,187 values (>99%) of 'Cottidae' (11187 fewer NA)
#>         changed 11,181 values (>99%) of 'Hediste diversicolor' (11181 fewer NA)
#>         changed 11,174 values (>99%) of 'Palaemon elegans' (11174 fewer NA)
#>         changed 11,164 values (>99%) of 'Ammodytidae' (11164 fewer NA)
#>         changed 11,182 values (>99%) of 'Caridea' (11182 fewer NA)
#>         changed 11,188 values (>99%) of 'Amphibalanus improvisus' (11188 fewer NA)
#>         changed 11,188 values (>99%) of 'Myoxocephalus quadricornis' (11188 fewer NA)
#>         changed 11,183 values (>99%) of 'Phyllodocida' (11183 fewer NA)
#>         changed 11,104 values (99%) of 'Remains' (11104 fewer NA)
#>         changed 11,180 values (>99%) of 'Palaemonidae' (11180 fewer NA)
#>         changed 11,176 values (>99%) of 'Carcinus maenas' (11176 fewer NA)
#>         changed 11,188 values (>99%) of 'Gastropoda' (11188 fewer NA)
#>         changed 11,178 values (>99%) of 'Sand' (11178 fewer NA)
#>         changed 11,186 values (>99%) of 'Cerastoderma glaucum' (11186 fewer NA)
#>         changed 11,180 values (>99%) of 'Hyperoplus lanceolatus' (11180 fewer NA)
#>         changed 11,163 values (>99%) of 'Mya arenaria' (11163 fewer NA)
#>         changed 11,176 values (>99%) of 'Hydrobia sp.' (11176 fewer NA)
#>         changed 11,182 values (>99%) of 'Wood' (11182 fewer NA)
#>         changed 11,186 values (>99%) of 'Pleuronectiformes' (11186 fewer NA)
#>         changed 11,188 values (>99%) of 'Scophthalmus maximus' (11188 fewer NA)
#>         changed 11,161 values (>99%) of 'Polychaeta' (11161 fewer NA)
#>         changed 11,091 values (99%) of 'Priapulida' (11091 fewer NA)
#>         changed 11,188 values (>99%) of 'Halicryptus' (11188 fewer NA)
#>         changed 11,187 values (>99%) of 'Neogobius melanostomus' (11187 fewer NA)
#>         changed 11,187 values (>99%) of 'Gobius niger' (11187 fewer NA)
#>         changed 11,188 values (>99%) of 'digestive tract' (11188 fewer NA)
#>         changed 11,188 values (>99%) of 'Pleuronectidae' (11188 fewer NA)
#>         changed 11,188 values (>99%) of 'sprattus sprattus' (11188 fewer NA)
#>         changed 11,188 values (>99%) of 'Gasterosteidae' (11188 fewer NA)
#>         changed 11,186 values (>99%) of 'litter' (11186 fewer NA)
#>         changed 11,167 values (>99%) of 'Mysida' (11167 fewer NA)
#>         changed 11,187 values (>99%) of 'Calanoida' (11187 fewer NA)
#>         changed 11,188 values (>99%) of 'Belone belone' (11188 fewer NA)
#>         changed 11,090 values (99%) of 'Pontoporeiidae' (11090 fewer NA)
#>         changed 11,185 values (>99%) of 'Mucus' (11185 fewer NA)
#>         changed 11,188 values (>99%) of 'Pectinaria sp.' (11188 fewer NA)
#>         changed 11,069 values (99%) of 'Macoma balthica' (11069 fewer NA)
#>         changed 10,904 values (97%) of 'remains' (10904 fewer NA)
#>         changed 11,136 values (>99%) of 'stone' (11136 fewer NA)
#>         changed 11,188 values (>99%) of 'carbon' (11188 fewer NA)
#>         changed 11,187 values (>99%) of 'Nephtys ciliata' (11187 fewer NA)
#>         changed 11,188 values (>99%) of 'Gastrosacus' (11188 fewer NA)
#>         changed 11,188 values (>99%) of 'wood' (11188 fewer NA)
#>         changed 11,188 values (>99%) of 'Litter/plastics' (11188 fewer NA)
#>         changed 11,188 values (>99%) of 'clupeidae' (11188 fewer NA)
#>         changed 11,176 values (>99%) of 'Pontoporeidae' (11176 fewer NA)
#>         changed 11,182 values (>99%) of 'sand' (11182 fewer NA)
#>         changed 11,187 values (>99%) of 'Decapoda' (11187 fewer NA)
#>         changed 11,188 values (>99%) of 'Agonus cataphractus' (11188 fewer NA)
#>         changed 11,188 values (>99%) of 'clupea harengus' (11188 fewer NA)
#>         changed 11,173 values (>99%) of 'Halicryptus spinolusus' (11173 fewer NA)
#>         changed 11,034 values (99%) of 'Mytilus edulis' (11034 fewer NA)
#>         changed 11,181 values (>99%) of 'priapulida' (11181 fewer NA)
#>         changed 11,188 values (>99%) of 'Prapulida' (11188 fewer NA)
#>         changed 11,188 values (>99%) of 'Myoxocephalus scorpius' (11188 fewer NA)

# Test how many zeroes I have per time period
d %>% 
  drop_na(Prey.weight) %>% 
  group_by(Year, Unique.pred.id, Prey.sp.code) %>% 
  summarise(tot_biom_per_prey = sum(Prey.weight)) %>% 
  ungroup() %>% 
  group_by(Unique.pred.id, Year) %>% 
  summarise(tot_biom_prey = sum(tot_biom_per_prey)) %>% 
  ungroup() %>% 
  mutate(empty = ifelse(tot_biom_prey == 0, "Y", "N")) %>% 
  ggplot(., aes(Year, fill = empty)) +
  geom_bar()
#> drop_na: removed 1,216 rows (5%), 24,419 rows remaining
#> group_by: 3 grouping variables (Year, Unique.pred.id, Prey.sp.code)
#> summarise: now 20,671 rows and 4 columns, 2 group variables remaining (Year, Unique.pred.id)
#> ungroup: no grouping variables
#> group_by: 2 grouping variables (Unique.pred.id, Year)
#> summarise: now 11,189 rows and 3 columns, one group variable remaining (Unique.pred.id)
#> ungroup: no grouping variables
#> mutate: new variable 'empty' (character) with 2 unique values and 0% NA

  
head(d_wide)
#> # A tibble: 6 × 117
#>   unique_pred_id saduria_entomon bylgides_sarsi pontoporeia_fem… clupea_harengus
#>   <chr>                    <dbl>          <dbl>            <dbl>           <dbl>
#> 1 1963_1_3_1963…           18.3             0                  0               0
#> 2 1963_1_3_1963…            5.62            1.2                0               0
#> 3 1963_1_3_1963…            3.09            0                  0               0
#> 4 1963_1_3_1963…           12.6             0                  0               0
#> 5 1963_1_3_1963…            2.32            0                  0               0
#> 6 1963_1_3_1963…            1.5             0                  0               0
#> # … with 112 more variables: mysis_mixta <dbl>, gammarus_sp <dbl>,
#> #   sprattus_sprattus <dbl>, enchelyopus_cimbrius <dbl>,
#> #   monoporeia_affinis <dbl>, mysis_relicta <dbl>, pontoporeia_sp <dbl>,
#> #   gadus_morhua <dbl>, pisces_eggs <dbl>, hyperia_galba <dbl>,
#> #   diastylis_rathkei <dbl>, halicryptus_spinulosus <dbl>, pisces <dbl>,
#> #   pomatoschistus_minutus <dbl>, limecola_balthica <dbl>, mollusca <dbl>,
#> #   platichthys_flesus <dbl>, gasterosteus_aculeatus <dbl>, mysidae <dbl>, …
str(d_wide)
#> tibble [11,189 × 117] (S3: tbl_df/tbl/data.frame)
#>  $ unique_pred_id            : chr [1:11189] "1963_1_3_1963_3_18_3_MAZ_41G8_22_NA_500_NA_1_COD" "1963_1_3_1963_3_18_3_MAZ_41G8_39_NA_450_NA_1_COD" "1963_1_3_1963_3_18_3_MAZ_41G8_41_NA_560_NA_1_COD" "1963_1_3_1963_3_18_3_MAZ_41G8_42_NA_390_NA_1_COD" ...
#>  $ saduria_entomon           : num [1:11189] 18.27 5.62 3.09 12.6 2.32 ...
#>  $ bylgides_sarsi            : num [1:11189] 0 1.2 0 0 0 0 0 0 0 0 ...
#>  $ pontoporeia_femorata      : num [1:11189] 0 0 0 0 0 0 3.34 0 0 0.1 ...
#>  $ clupea_harengus           : num [1:11189] 0 0 0 0 0 ...
#>  $ mysis_mixta               : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ gammarus_sp               : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ sprattus_sprattus         : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ enchelyopus_cimbrius      : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ monoporeia_affinis        : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ mysis_relicta             : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ pontoporeia_sp            : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ gadus_morhua              : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ pisces_eggs               : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ hyperia_galba             : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ diastylis_rathkei         : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ halicryptus_spinulosus    : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ pisces                    : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ pomatoschistus_minutus    : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ limecola_balthica         : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ mollusca                  : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ platichthys_flesus        : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ gasterosteus_aculeatus    : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ mysidae                   : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ neomysis_integer          : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ cumacea                   : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ corophium_volutator       : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ stone                     : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ clupeidae                 : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ gobiidae                  : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ na                        : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ scoloplos_armiger         : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ plastic                   : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ priapulus_caudatus        : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ bivalvia                  : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ mytilus_sp                : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ crangon_crangon           : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ idotea_balthica           : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ trachurus_trachurus       : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ annelida                  : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ algae                     : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ priapulidae               : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ waste                     : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ praunus_flexuosus         : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ unidentified_mass         : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ merlangius_merlangus      : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ scales                    : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ gadidae                   : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ crustacea                 : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ amphipoda                 : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ spine                     : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ pleuronectes_platessa     : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ anguilla_anguilla         : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ empty                     : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ carbon                    : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ copepoda                  : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ zoarces_viviparus         : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ spinachia_spinachia       : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ pungitius_pungitius       : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ terebellides_stroemii     : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ palaemon_sp               : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ ammodytes_tobianus        : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ cottidae                  : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ hediste_diversicolor      : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ palaemon_elegans          : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ ammodytidae               : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ caridea                   : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ amphibalanus_improvisus   : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ myoxocephalus_quadricornis: num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ phyllodocida              : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ remains                   : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ palaemonidae              : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ carcinus_maenas           : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ gastropoda                : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ sand                      : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ cerastoderma_glaucum      : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ hyperoplus_lanceolatus    : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ mya_arenaria              : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ hydrobia_sp               : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ wood                      : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ pleuronectiformes         : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ scophthalmus_maximus      : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ polychaeta                : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ priapulida                : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ halicryptus               : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ neogobius_melanostomus    : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ gobius_niger              : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ digestive_tract           : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ pleuronectidae            : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ sprattus_sprattus_2       : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ gasterosteidae            : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ litter                    : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ mysida                    : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ calanoida                 : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ belone_belone             : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ pontoporeiidae            : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ mucus                     : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ pectinaria_sp             : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ macoma_balthica           : num [1:11189] 0 0 0 0 0 0 0 0 0 0 ...
#>   [list output truncated]
colnames(d_wide)
#>   [1] "unique_pred_id"             "saduria_entomon"           
#>   [3] "bylgides_sarsi"             "pontoporeia_femorata"      
#>   [5] "clupea_harengus"            "mysis_mixta"               
#>   [7] "gammarus_sp"                "sprattus_sprattus"         
#>   [9] "enchelyopus_cimbrius"       "monoporeia_affinis"        
#>  [11] "mysis_relicta"              "pontoporeia_sp"            
#>  [13] "gadus_morhua"               "pisces_eggs"               
#>  [15] "hyperia_galba"              "diastylis_rathkei"         
#>  [17] "halicryptus_spinulosus"     "pisces"                    
#>  [19] "pomatoschistus_minutus"     "limecola_balthica"         
#>  [21] "mollusca"                   "platichthys_flesus"        
#>  [23] "gasterosteus_aculeatus"     "mysidae"                   
#>  [25] "neomysis_integer"           "cumacea"                   
#>  [27] "corophium_volutator"        "stone"                     
#>  [29] "clupeidae"                  "gobiidae"                  
#>  [31] "na"                         "scoloplos_armiger"         
#>  [33] "plastic"                    "priapulus_caudatus"        
#>  [35] "bivalvia"                   "mytilus_sp"                
#>  [37] "crangon_crangon"            "idotea_balthica"           
#>  [39] "trachurus_trachurus"        "annelida"                  
#>  [41] "algae"                      "priapulidae"               
#>  [43] "waste"                      "praunus_flexuosus"         
#>  [45] "unidentified_mass"          "merlangius_merlangus"      
#>  [47] "scales"                     "gadidae"                   
#>  [49] "crustacea"                  "amphipoda"                 
#>  [51] "spine"                      "pleuronectes_platessa"     
#>  [53] "anguilla_anguilla"          "empty"                     
#>  [55] "carbon"                     "copepoda"                  
#>  [57] "zoarces_viviparus"          "spinachia_spinachia"       
#>  [59] "pungitius_pungitius"        "terebellides_stroemii"     
#>  [61] "palaemon_sp"                "ammodytes_tobianus"        
#>  [63] "cottidae"                   "hediste_diversicolor"      
#>  [65] "palaemon_elegans"           "ammodytidae"               
#>  [67] "caridea"                    "amphibalanus_improvisus"   
#>  [69] "myoxocephalus_quadricornis" "phyllodocida"              
#>  [71] "remains"                    "palaemonidae"              
#>  [73] "carcinus_maenas"            "gastropoda"                
#>  [75] "sand"                       "cerastoderma_glaucum"      
#>  [77] "hyperoplus_lanceolatus"     "mya_arenaria"              
#>  [79] "hydrobia_sp"                "wood"                      
#>  [81] "pleuronectiformes"          "scophthalmus_maximus"      
#>  [83] "polychaeta"                 "priapulida"                
#>  [85] "halicryptus"                "neogobius_melanostomus"    
#>  [87] "gobius_niger"               "digestive_tract"           
#>  [89] "pleuronectidae"             "sprattus_sprattus_2"       
#>  [91] "gasterosteidae"             "litter"                    
#>  [93] "mysida"                     "calanoida"                 
#>  [95] "belone_belone"              "pontoporeiidae"            
#>  [97] "mucus"                      "pectinaria_sp"             
#>  [99] "macoma_balthica"            "remains_2"                 
#> [101] "stone_2"                    "carbon_2"                  
#> [103] "nephtys_ciliata"            "gastrosacus"               
#> [105] "wood_2"                     "litter_plastics"           
#> [107] "clupeidae_2"                "pontoporeidae"             
#> [109] "sand_2"                     "decapoda"                  
#> [111] "agonus_cataphractus"        "clupea_harengus_2"         
#> [113] "halicryptus_spinolusus"     "mytilus_edulis"            
#> [115] "priapulida_2"               "prapulida"                 
#> [117] "myoxocephalus_scorpius"
 
# Now make some calculations and aggregate some taxonomic level. Since all columns are assigned to 
# some higher level group (or the same group), the sum of these is the total stomach content. 
# Note that I have one group for unidentified clupeids, but also sprat and herring. So if I want the 
# total of some aggregated group, then I need to add all the sub-groups.

# CHECK all these are covered now that I also use old stomach data!

d_wide2 <- d_wide %>% 
  mutate(amphipoda_tot = hyperia_galba + gammarus_sp + monoporeia_affinis + 
           corophium_volutator + amphipoda,
         bivalvia_tot = bivalvia + mytilus_sp + cerastoderma_glaucum + mya_arenaria + macoma_balthica + 
           mytilus_edulis + limecola_balthica,
         clupeidae_tot = clupeidae + clupeidae_2,
         clupea_harengus_tot = clupea_harengus + clupea_harengus_2,
         gadus_morhua_tot = gadus_morhua,
         gadiformes_tot = gadidae + merlangius_merlangus,
         gobiidae_tot = gobiidae,
         mysidae_tot = mysis_relicta + mysidae + neomysis_integer + mysis_mixta + mysida + gastrosacus,
         non_bio_tot = stone + plastic + sand + wood + litter + carbon + stone_2 + carbon_2 + wood_2 + 
           litter_plastics + sand_2,
         other_crustacea_tot = pontoporeia_femorata + crangon_crangon + idotea_balthica + cumacea + 
           praunus_flexuosus + crustacea + diastylis_rathkei + palaemon_sp + palaemon_elegans + caridea +
           amphibalanus_improvisus + palaemonidae + carcinus_maenas + copepoda + calanoida + pontoporeiidae + decapoda, 
         other_tot = halicryptus_spinulosus + priapulus_caudatus + annelida + algae + priapulidae + waste +
           unidentified_mass + spine + empty + mollusca + na + remains + gastropoda + hydrobia_sp + 
           priapulida + halicryptus + digestive_tract + mucus + remains_2 + pontoporeidae + pontoporeia_sp + 
           halicryptus_spinolusus + priapulida_2 + prapulida,
         other_pisces_tot = pomatoschistus_minutus + pisces + pisces_eggs + enchelyopus_cimbrius + trachurus_trachurus +
           gasterosteus_aculeatus + scales + pleuronectes_platessa + anguilla_anguilla + pungitius_pungitius + ammodytes_tobianus +
           cottidae + spinachia_spinachia + zoarces_viviparus + ammodytidae + myoxocephalus_quadricornis +
           hyperoplus_lanceolatus + pleuronectiformes + scophthalmus_maximus + neogobius_melanostomus + gobius_niger +
           pleuronectidae + gasterosteidae + belone_belone + agonus_cataphractus + myoxocephalus_scorpius,
         platichthys_flesus_tot = platichthys_flesus,
         polychaeta_tot = bylgides_sarsi + scoloplos_armiger + terebellides_stroemii + hediste_diversicolor + 
           phyllodocida + polychaeta + pectinaria_sp + nephtys_ciliata,
         saduria_entomon_tot = saduria_entomon,
         sprattus_sprattus_tot = sprattus_sprattus + sprattus_sprattus_2
         )
#> mutate: new variable 'amphipoda_tot' (double) with 160 unique values and 0% NA
#>         new variable 'bivalvia_tot' (double) with 563 unique values and 0% NA
#>         new variable 'clupeidae_tot' (double) with 297 unique values and 0% NA
#>         new variable 'clupea_harengus_tot' (double) with 826 unique values and 0% NA
#>         new variable 'gadus_morhua_tot' (double) with 156 unique values and 0% NA
#>         new variable 'gadiformes_tot' (double) with 7 unique values and 0% NA
#>         new variable 'gobiidae_tot' (double) with 165 unique values and 0% NA
#>         new variable 'mysidae_tot' (double) with 445 unique values and 0% NA
#>         new variable 'non_bio_tot' (double) with 66 unique values and 0% NA
#>         new variable 'other_crustacea_tot' (double) with 368 unique values and 0% NA
#>         new variable 'other_tot' (double) with 310 unique values and 0% NA
#>         new variable 'other_pisces_tot' (double) with 474 unique values and 0% NA
#>         new variable 'platichthys_flesus_tot' (double) with 17 unique values and 0% NA
#>         new variable 'polychaeta_tot' (double) with 738 unique values and 0% NA
#>         new variable 'saduria_entomon_tot' (double) with 784 unique values and 0% NA
#>         new variable 'sprattus_sprattus_tot' (double) with 1,295 unique values and 0% NA

# 16 prey groups in total

length(unique(d$Unique.pred.id))
#> [1] 12310
nrow(d_wide2) # The reason they differ in length (nrow) is because I remove NA prey weight
#> [1] 11189
length(unique(drop_na(d, Prey.weight)$Unique.pred.id))
#> drop_na: removed 1,216 rows (5%), 24,419 rows remaining
#> [1] 11189

# Select only columns aggregated columns (ending with _tot)
data.frame(d_wide2[1, c(1, 118:133)])
#>                                     unique_pred_id amphipoda_tot bivalvia_tot
#> 1 1963_1_3_1963_3_18_3_MAZ_41G8_22_NA_500_NA_1_COD             0            0
#>   clupeidae_tot clupea_harengus_tot gadus_morhua_tot gadiformes_tot
#> 1             0                   0                0              0
#>   gobiidae_tot mysidae_tot non_bio_tot other_crustacea_tot other_tot
#> 1            0           0           0                   0         0
#>   other_pisces_tot platichthys_flesus_tot polychaeta_tot saduria_entomon_tot
#> 1                0                      0              0               18.27
#>   sprattus_sprattus_tot
#> 1                     0
d_wide3 <- d_wide2 %>% dplyr::select(c(1, 118:133))

sort(colnames(d_wide3))
#>  [1] "amphipoda_tot"          "bivalvia_tot"           "clupea_harengus_tot"   
#>  [4] "clupeidae_tot"          "gadiformes_tot"         "gadus_morhua_tot"      
#>  [7] "gobiidae_tot"           "mysidae_tot"            "non_bio_tot"           
#> [10] "other_crustacea_tot"    "other_pisces_tot"       "other_tot"             
#> [13] "platichthys_flesus_tot" "polychaeta_tot"         "saduria_entomon_tot"   
#> [16] "sprattus_sprattus_tot"  "unique_pred_id"

# Add back in other information 
d_sub <- d %>%
  dplyr::select(Year, Quarter, Cruise, HN, Sample, Predator.code, X, Y, Lat, Long, Ices.rect,
                Pred.size.mm, Pred.weight.g, Unique.pred.id, source) %>% 
  rename("Predator.spec" = "Predator.code") %>% 
  distinct(Unique.pred.id, .keep_all = TRUE) %>%
  janitor::clean_names()  
#> rename: renamed one variable (Predator.spec)
#> distinct: removed 13,325 rows (52%), 12,310 rows remaining

d_wide4 <- left_join(d_wide3, d_sub) # Why missing 1,121 IDs? I lose them when making d_wide, I filter non-NA prey weights!
#> Joining, by = "unique_pred_id"
#> left_join: added 14 columns (year, quarter, cruise, hn, sample, …)
#>            > rows only in x        0
#>            > rows only in y  ( 1,121)
#>            > matched rows     11,189
#>            >                 ========
#>            > rows total       11,189
# missing_ids <- unique(d_sub$unique_pred_id)[!unique(d_sub$unique_pred_id) %in% unique(d_wide3$unique_pred_id)]
# d %>% filter(Unique.pred.id %in% missing_ids) %>% distinct(Prey.weight)

colnames(d_wide4)
#>  [1] "unique_pred_id"         "amphipoda_tot"          "bivalvia_tot"          
#>  [4] "clupeidae_tot"          "clupea_harengus_tot"    "gadus_morhua_tot"      
#>  [7] "gadiformes_tot"         "gobiidae_tot"           "mysidae_tot"           
#> [10] "non_bio_tot"            "other_crustacea_tot"    "other_tot"             
#> [13] "other_pisces_tot"       "platichthys_flesus_tot" "polychaeta_tot"        
#> [16] "saduria_entomon_tot"    "sprattus_sprattus_tot"  "year"                  
#> [19] "quarter"                "cruise"                 "hn"                    
#> [22] "sample"                 "predator_spec"          "x"                     
#> [25] "y"                      "lat"                    "long"                  
#> [28] "ices_rect"              "pred_size_mm"           "pred_weight_g"         
#> [31] "source"
data.frame(d_wide4[1, 2:17])
#>   amphipoda_tot bivalvia_tot clupeidae_tot clupea_harengus_tot gadus_morhua_tot
#> 1             0            0             0                   0                0
#>   gadiformes_tot gobiidae_tot mysidae_tot non_bio_tot other_crustacea_tot
#> 1              0            0           0           0                   0
#>   other_tot other_pisces_tot platichthys_flesus_tot polychaeta_tot
#> 1         0                0                      0              0
#>   saduria_entomon_tot sprattus_sprattus_tot
#> 1               18.27                     0
d_wide4 <- d_wide4 %>% mutate(tot_prey_biom = rowSums(.[2:17]))
#> mutate: new variable 'tot_prey_biom' (double) with 3,239 unique values and 0% NA

d_wide4 %>% group_by(unique_pred_id) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)
#> group_by: one grouping variable (unique_pred_id)
#> mutate (grouped): new variable 'n' (integer) with one unique value and 0% NA
#> ungroup: no grouping variables
#> distinct: removed 11,188 rows (>99%), one row remaining
#> # A tibble: 1 × 1
#>       n
#>   <int>
#> 1     1

# Split data by species
fle <- d_wide4 %>%
  filter(predator_spec == "FLE") %>% 
  mutate(pred_cm = pred_size_mm/10,
         pred_cm_class = cut(pred_cm, breaks = c(0, 9, 20, 30, 200), right = TRUE))
#> filter: removed 8,932 rows (80%), 2,257 rows remaining
#> mutate: new variable 'pred_cm' (double) with 113 unique values and <1% NA
#>         new variable 'pred_cm_class' (factor) with 5 unique values and <1% NA

head(data.frame(fle), 20)
#>         unique_pred_id amphipoda_tot bivalvia_tot clupeidae_tot
#> 1    2015_2_1010_4_FLE             0         4.32             0
#> 2  2015_2_1010_402_FLE             0         0.00             0
#> 3     2015_2_108_3_FLE             0         2.09             0
#> 4   2015_2_202_403_FLE             0         9.59             0
#> 5     2015_2_203_1_FLE             0         2.31             0
#> 6     2015_2_204_2_FLE             0         7.33             0
#> 7    2015_2_501_10_FLE             0         5.52             0
#> 8    2015_2_501_60_FLE             0         0.66             0
#> 9     2015_2_501_8_FLE             0         1.17             0
#> 10    2015_2_501_9_FLE             0         0.26             0
#> 11    2015_2_504_5_FLE             0         0.00             0
#> 12   2015_2_504_58_FLE             0        10.43             0
#> 13    2015_2_504_6_FLE             0        11.53             0
#> 14    2015_2_504_7_FLE             0         1.25             0
#> 15   2015_2_506_11_FLE             0         0.05             0
#> 16   2015_2_506_12_FLE             0         0.00             0
#> 17   2015_2_506_13_FLE             0        23.42             0
#> 18   2015_2_506_14_FLE             0        12.96             0
#> 19   2015_2_506_15_FLE             0        16.14             0
#> 20   2015_2_506_16_FLE             0        31.49             0
#>    clupea_harengus_tot gadus_morhua_tot gadiformes_tot gobiidae_tot mysidae_tot
#> 1                    0                0              0            0           0
#> 2                    0                0              0            0           0
#> 3                    0                0              0            0           0
#> 4                    0                0              0            0           0
#> 5                    0                0              0            0           0
#> 6                    0                0              0            0           0
#> 7                    0                0              0            0           0
#> 8                    0                0              0            0           0
#> 9                    0                0              0            0           0
#> 10                   0                0              0            0           0
#> 11                   0                0              0            0           0
#> 12                   0                0              0            0           0
#> 13                   0                0              0            0           0
#> 14                   0                0              0            0           0
#> 15                   0                0              0            0           0
#> 16                   0                0              0            0           0
#> 17                   0                0              0            0           0
#> 18                   0                0              0            0           0
#> 19                   0                0              0            0           0
#> 20                   0                0              0            0           0
#>    non_bio_tot other_crustacea_tot other_tot other_pisces_tot
#> 1         0.19                0.00      0.00             2.93
#> 2         0.00                0.00      0.00             0.00
#> 3         0.00                0.00      0.00             0.00
#> 4         0.13                0.00      0.00             0.00
#> 5         0.00                0.00      0.00             0.00
#> 6         0.00                0.00      0.00             0.00
#> 7         0.00                0.00      0.00             0.00
#> 8         0.00                0.00      0.00             0.00
#> 9         0.00                0.01      0.03             0.00
#> 10        0.00                0.00      0.00             0.00
#> 11        0.00                0.02      0.00             0.00
#> 12        0.00                0.00      0.02             0.00
#> 13        0.00                0.00      0.00             0.00
#> 14        0.00                0.00      0.00             0.00
#> 15        0.00                0.00      0.00             0.00
#> 16        0.00                0.10      0.00             0.00
#> 17        0.00                0.00      0.00             0.00
#> 18        0.00                0.00      0.00             0.00
#> 19        0.00                0.00      0.00             0.00
#> 20        0.00                0.00      0.00             0.00
#>    platichthys_flesus_tot polychaeta_tot saduria_entomon_tot
#> 1                       0              0                0.00
#> 2                       0              0                0.00
#> 3                       0              0                5.61
#> 4                       0              0                1.02
#> 5                       0              0                3.10
#> 6                       0              0                2.98
#> 7                       0              0                0.00
#> 8                       0              0                0.00
#> 9                       0              0                0.00
#> 10                      0              0                0.00
#> 11                      0              0                0.08
#> 12                      0              0                0.00
#> 13                      0              0                0.00
#> 14                      0              0                0.00
#> 15                      0              0                0.00
#> 16                      0              0                0.00
#> 17                      0              0                0.00
#> 18                      0              0                0.00
#> 19                      0              0                0.00
#> 20                      0              0                0.00
#>    sprattus_sprattus_tot year quarter  cruise   hn sample predator_spec
#> 1                      0 2015       2 INSPIRE 1010      4           FLE
#> 2                      0 2015       2 INSPIRE 1010    402           FLE
#> 3                      0 2015       2 INSPIRE  108      3           FLE
#> 4                      0 2015       2 INSPIRE  202    403           FLE
#> 5                      0 2015       2 INSPIRE  203      1           FLE
#> 6                      0 2015       2 INSPIRE  204      2           FLE
#> 7                      0 2015       2 INSPIRE  501     10           FLE
#> 8                      0 2015       2 INSPIRE  501     60           FLE
#> 9                      0 2015       2 INSPIRE  501      8           FLE
#> 10                     0 2015       2 INSPIRE  501      9           FLE
#> 11                     0 2015       2 INSPIRE  504      5           FLE
#> 12                     0 2015       2 INSPIRE  504     58           FLE
#> 13                     0 2015       2 INSPIRE  504      6           FLE
#> 14                     0 2015       2 INSPIRE  504      7           FLE
#> 15                     0 2015       2 INSPIRE  506     11           FLE
#> 16                     0 2015       2 INSPIRE  506     12           FLE
#> 17                     0 2015       2 INSPIRE  506     13           FLE
#> 18                     0 2015       2 INSPIRE  506     14           FLE
#> 19                     0 2015       2 INSPIRE  506     15           FLE
#> 20                     0 2015       2 INSPIRE  506     16           FLE
#>           x        y   lat  long ices_rect pred_size_mm pred_weight_g
#> 1  486.2897 6209.440 56.03 14.78      41G4          270           236
#> 2  486.2897 6209.440 56.03 14.78      41G4          304           476
#> 3  483.1607 6206.112 56.00 14.73      41G4          350           334
#> 4  485.6369 6200.539 55.95 14.77      40G4          302           264
#> 5  486.9129 6209.438 56.03 14.79      41G4          299           246
#> 6  485.6517 6204.990 55.99 14.77      40G4          346           346
#> 7  478.0250 6177.198 55.74 14.65      40G4          314           305
#> 8  478.0250 6177.198 55.74 14.65      40G4          330           548
#> 9  478.0250 6177.198 55.74 14.65      40G4          202            89
#> 10 478.0250 6177.198 55.74 14.65      40G4          298           251
#> 11 477.9969 6171.634 55.69 14.65      40G4          352           325
#> 12 477.9969 6171.634 55.69 14.65      40G4          313           319
#> 13 477.9969 6171.634 55.69 14.65      40G4          328           367
#> 14 477.9969 6171.634 55.69 14.65      40G4          250           172
#> 15 479.2754 6176.079 55.73 14.67      40G4          299           196
#> 16 479.2754 6176.079 55.73 14.67      40G4          280           184
#> 17 479.2754 6176.079 55.73 14.67      40G4          349           389
#> 18 479.2754 6176.079 55.73 14.67      40G4          335           368
#> 19 479.2754 6176.079 55.73 14.67      40G4          340           371
#> 20 479.2754 6176.079 55.73 14.67      40G4          337           380
#>         source tot_prey_biom pred_cm pred_cm_class
#> 1  Kevin's MSC          7.44    27.0       (20,30]
#> 2  Kevin's MSC          0.00    30.4      (30,200]
#> 3  Kevin's MSC          7.70    35.0      (30,200]
#> 4  Kevin's MSC         10.74    30.2      (30,200]
#> 5  Kevin's MSC          5.41    29.9       (20,30]
#> 6  Kevin's MSC         10.31    34.6      (30,200]
#> 7  Kevin's MSC          5.52    31.4      (30,200]
#> 8  Kevin's MSC          0.66    33.0      (30,200]
#> 9  Kevin's MSC          1.21    20.2       (20,30]
#> 10 Kevin's MSC          0.26    29.8       (20,30]
#> 11 Kevin's MSC          0.10    35.2      (30,200]
#> 12 Kevin's MSC         10.45    31.3      (30,200]
#> 13 Kevin's MSC         11.53    32.8      (30,200]
#> 14 Kevin's MSC          1.25    25.0       (20,30]
#> 15 Kevin's MSC          0.05    29.9       (20,30]
#> 16 Kevin's MSC          0.10    28.0       (20,30]
#> 17 Kevin's MSC         23.42    34.9      (30,200]
#> 18 Kevin's MSC         12.96    33.5      (30,200]
#> 19 Kevin's MSC         16.14    34.0      (30,200]
#> 20 Kevin's MSC         31.49    33.7      (30,200]
unique(fle$pred_cm_class)
#> [1] (20,30]  (30,200] (9,20]   (0,9]    <NA>    
#> Levels: (0,9] (9,20] (20,30] (30,200]

cod <- d_wide4 %>%
  filter(predator_spec == "COD") %>% 
  mutate(pred_cm = pred_size_mm/10,
         pred_cm_class = cut(pred_cm, breaks = c(0, 6, 20, 30, 40, 50, 200), right = TRUE))
#> filter: removed 2,257 rows (20%), 8,932 rows remaining
#> mutate: new variable 'pred_cm' (double) with 224 unique values and <1% NA
#>         new variable 'pred_cm_class' (factor) with 7 unique values and 3% NA

head(data.frame(cod), 20)
#>                                      unique_pred_id amphipoda_tot bivalvia_tot
#> 1  1963_1_3_1963_3_18_3_MAZ_41G8_22_NA_500_NA_1_COD             0            0
#> 2  1963_1_3_1963_3_18_3_MAZ_41G8_39_NA_450_NA_1_COD             0            0
#> 3  1963_1_3_1963_3_18_3_MAZ_41G8_41_NA_560_NA_1_COD             0            0
#> 4  1963_1_3_1963_3_18_3_MAZ_41G8_42_NA_390_NA_1_COD             0            0
#> 5  1963_1_3_1963_3_18_3_MAZ_41G8_45_NA_270_NA_1_COD             0            0
#> 6  1963_1_3_1963_3_18_3_MAZ_41G8_47_NA_270_NA_1_COD             0            0
#> 7  1963_1_3_1963_3_18_3_MAZ_41G8_51_NA_320_NA_1_COD             0            0
#> 8   1963_1_5_1963_3_20_5_MAZ_44G9_1_NA_650_NA_1_COD             0            0
#> 9  1963_1_5_1963_3_20_5_MAZ_44G9_10_NA_390_NA_1_COD             0            0
#> 10 1963_1_5_1963_3_20_5_MAZ_44G9_11_NA_210_NA_1_COD             0            0
#> 11 1963_1_5_1963_3_20_5_MAZ_44G9_13_NA_220_NA_1_COD             0            0
#> 12 1963_1_5_1963_3_20_5_MAZ_44G9_14_NA_240_NA_1_COD             0            0
#> 13 1963_1_5_1963_3_20_5_MAZ_44G9_16_NA_230_NA_1_COD             0            0
#> 14 1963_1_5_1963_3_20_5_MAZ_44G9_17_NA_230_NA_1_COD             0            0
#> 15 1963_1_5_1963_3_20_5_MAZ_44G9_18_NA_160_NA_1_COD             0            0
#> 16 1963_1_5_1963_3_20_5_MAZ_44G9_20_NA_310_NA_1_COD             0            0
#> 17 1963_1_5_1963_3_20_5_MAZ_44G9_21_NA_240_NA_1_COD             0            0
#> 18 1963_1_5_1963_3_20_5_MAZ_44G9_22_NA_310_NA_1_COD             0            0
#> 19 1963_1_5_1963_3_20_5_MAZ_44G9_23_NA_410_NA_3_COD             0            0
#> 20 1963_1_5_1963_3_20_5_MAZ_44G9_27_NA_260_NA_1_COD             0            0
#>    clupeidae_tot clupea_harengus_tot gadus_morhua_tot gadiformes_tot
#> 1              0                0.00                0              0
#> 2              0                0.00                0              0
#> 3              0                0.00                0              0
#> 4              0                0.00                0              0
#> 5              0                0.00                0              0
#> 6              0                0.00                0              0
#> 7              0                0.00                0              0
#> 8              0               48.77                0              0
#> 9              0                0.00                0              0
#> 10             0                0.00                0              0
#> 11             0                0.00                0              0
#> 12             0                0.00                0              0
#> 13             0                0.00                0              0
#> 14             0                0.00                0              0
#> 15             0                0.00                0              0
#> 16             0                0.00                0              0
#> 17             0                0.00                0              0
#> 18             0                0.00                0              0
#> 19             0                4.17                0              0
#> 20             0                0.00                0              0
#>    gobiidae_tot mysidae_tot non_bio_tot other_crustacea_tot other_tot
#> 1             0           0           0                0.00         0
#> 2             0           0           0                0.00         0
#> 3             0           0           0                0.00         0
#> 4             0           0           0                0.00         0
#> 5             0           0           0                0.00         0
#> 6             0           0           0                0.00         0
#> 7             0           0           0                3.34         0
#> 8             0           0           0                0.00         0
#> 9             0           0           0                0.00         0
#> 10            0           0           0                0.10         0
#> 11            0           0           0                0.85         0
#> 12            0           0           0                0.97         0
#> 13            0           0           0                0.82         0
#> 14            0           0           0                0.32         0
#> 15            0           0           0                0.87         0
#> 16            0           0           0                1.00         0
#> 17            0           0           0                0.00         0
#> 18            0           0           0                0.00         0
#> 19            0           0           0                0.00         0
#> 20            0           0           0                0.00         0
#>    other_pisces_tot platichthys_flesus_tot polychaeta_tot saduria_entomon_tot
#> 1                 0                      0            0.0               18.27
#> 2                 0                      0            1.2                5.62
#> 3                 0                      0            0.0                3.09
#> 4                 0                      0            0.0               12.60
#> 5                 0                      0            0.0                2.32
#> 6                 0                      0            0.0                1.50
#> 7                 0                      0            0.0                1.76
#> 8                 0                      0            0.0                0.00
#> 9                 0                      0            0.0                5.90
#> 10                0                      0            0.0                0.67
#> 11                0                      0            0.0                1.54
#> 12                0                      0            0.0                0.00
#> 13                0                      0            0.0                0.00
#> 14                0                      0            0.0                0.00
#> 15                0                      0            0.0                0.00
#> 16                0                      0            0.0                0.45
#> 17                0                      0            0.0                4.60
#> 18                0                      0            0.0                3.37
#> 19                0                      0            0.0                0.00
#> 20                0                      0            0.0                2.85
#>    sprattus_sprattus_tot year quarter  cruise hn
#> 1                      0 1963       1 No name  3
#> 2                      0 1963       1 No name  3
#> 3                      0 1963       1 No name  3
#> 4                      0 1963       1 No name  3
#> 5                      0 1963       1 No name  3
#> 6                      0 1963       1 No name  3
#> 7                      0 1963       1 No name  3
#> 8                      0 1963       1 No name  5
#> 9                      0 1963       1 No name  5
#> 10                     0 1963       1 No name  5
#> 11                     0 1963       1 No name  5
#> 12                     0 1963       1 No name  5
#> 13                     0 1963       1 No name  5
#> 14                     0 1963       1 No name  5
#> 15                     0 1963       1 No name  5
#> 16                     0 1963       1 No name  5
#> 17                     0 1963       1 No name  5
#> 18                     0 1963       1 No name  5
#> 19                     0 1963       1 No name  5
#> 20                     0 1963       1 No name  5
#>                                 sample predator_spec        x        y   lat
#> 1  1963_3_18_3_MAZ_41G8_22_NA_500_NA_1           COD 716.8245 6239.414 56.25
#> 2  1963_3_18_3_MAZ_41G8_39_NA_450_NA_1           COD 716.8245 6239.414 56.25
#> 3  1963_3_18_3_MAZ_41G8_41_NA_560_NA_1           COD 716.8245 6239.414 56.25
#> 4  1963_3_18_3_MAZ_41G8_42_NA_390_NA_1           COD 716.8245 6239.414 56.25
#> 5  1963_3_18_3_MAZ_41G8_45_NA_270_NA_1           COD 716.8245 6239.414 56.25
#> 6  1963_3_18_3_MAZ_41G8_47_NA_270_NA_1           COD 716.8245 6239.414 56.25
#> 7  1963_3_18_3_MAZ_41G8_51_NA_320_NA_1           COD 716.8245 6239.414 56.25
#> 8   1963_3_20_5_MAZ_44G9_1_NA_650_NA_1           COD 767.7241 6409.776 57.75
#> 9  1963_3_20_5_MAZ_44G9_10_NA_390_NA_1           COD 767.7241 6409.776 57.75
#> 10 1963_3_20_5_MAZ_44G9_11_NA_210_NA_1           COD 767.7241 6409.776 57.75
#> 11 1963_3_20_5_MAZ_44G9_13_NA_220_NA_1           COD 767.7241 6409.776 57.75
#> 12 1963_3_20_5_MAZ_44G9_14_NA_240_NA_1           COD 767.7241 6409.776 57.75
#> 13 1963_3_20_5_MAZ_44G9_16_NA_230_NA_1           COD 767.7241 6409.776 57.75
#> 14 1963_3_20_5_MAZ_44G9_17_NA_230_NA_1           COD 767.7241 6409.776 57.75
#> 15 1963_3_20_5_MAZ_44G9_18_NA_160_NA_1           COD 767.7241 6409.776 57.75
#> 16 1963_3_20_5_MAZ_44G9_20_NA_310_NA_1           COD 767.7241 6409.776 57.75
#> 17 1963_3_20_5_MAZ_44G9_21_NA_240_NA_1           COD 767.7241 6409.776 57.75
#> 18 1963_3_20_5_MAZ_44G9_22_NA_310_NA_1           COD 767.7241 6409.776 57.75
#> 19 1963_3_20_5_MAZ_44G9_23_NA_410_NA_3           COD 767.7241 6409.776 57.75
#> 20 1963_3_20_5_MAZ_44G9_27_NA_260_NA_1           COD 767.7241 6409.776 57.75
#>    long ices_rect pred_size_mm pred_weight_g      source tot_prey_biom pred_cm
#> 1  18.5      41G8          500            NA Kevin's MSC         18.27      50
#> 2  18.5      41G8          450            NA Kevin's MSC          6.82      45
#> 3  18.5      41G8          560            NA Kevin's MSC          3.09      56
#> 4  18.5      41G8          390            NA Kevin's MSC         12.60      39
#> 5  18.5      41G8          270            NA Kevin's MSC          2.32      27
#> 6  18.5      41G8          270            NA Kevin's MSC          1.50      27
#> 7  18.5      41G8          320            NA Kevin's MSC          5.10      32
#> 8  19.5      44G9          650            NA Kevin's MSC         48.77      65
#> 9  19.5      44G9          390            NA Kevin's MSC          5.90      39
#> 10 19.5      44G9          210            NA Kevin's MSC          0.77      21
#> 11 19.5      44G9          220            NA Kevin's MSC          2.39      22
#> 12 19.5      44G9          240            NA Kevin's MSC          0.97      24
#> 13 19.5      44G9          230            NA Kevin's MSC          0.82      23
#> 14 19.5      44G9          230            NA Kevin's MSC          0.32      23
#> 15 19.5      44G9          160            NA Kevin's MSC          0.87      16
#> 16 19.5      44G9          310            NA Kevin's MSC          1.45      31
#> 17 19.5      44G9          240            NA Kevin's MSC          4.60      24
#> 18 19.5      44G9          310            NA Kevin's MSC          3.37      31
#> 19 19.5      44G9          410            NA Kevin's MSC          4.17      41
#> 20 19.5      44G9          260            NA Kevin's MSC          2.85      26
#>    pred_cm_class
#> 1        (40,50]
#> 2        (40,50]
#> 3       (50,200]
#> 4        (30,40]
#> 5        (20,30]
#> 6        (20,30]
#> 7        (30,40]
#> 8       (50,200]
#> 9        (30,40]
#> 10       (20,30]
#> 11       (20,30]
#> 12       (20,30]
#> 13       (20,30]
#> 14       (20,30]
#> 15        (6,20]
#> 16       (30,40]
#> 17       (20,30]
#> 18       (30,40]
#> 19       (40,50]
#> 20       (20,30]
unique(cod$pred_cm_class)
#> [1] (40,50]  (50,200] (30,40]  (20,30]  (6,20]   <NA>     (0,6]   
#> Levels: (0,6] (6,20] (20,30] (30,40] (40,50] (50,200]

Find which prey are shared for cod and flounder and explore data more

# First subset data to new and old (years with flounder data and without)
# Plot and calculate proportion empty stomachs
cod %>% 
  mutate(empty_stomach = ifelse(tot_prey_biom == 0, "Y", "N")) %>% 
  ggplot(., aes(empty_stomach, fill = empty_stomach)) + 
  scale_fill_brewer(palette = "Set2") +
  geom_bar()
#> mutate: new variable 'empty_stomach' (character) with 2 unique values and 0% NA


# Plot "stomach" fullness, i.e., the weight of prey in stomach (relative to predator weight)
# Can also be called Feeding Ratio, FR
t <- cod %>% drop_na(pred_weight_g)
#> drop_na: removed 3,384 rows (38%), 5,548 rows remaining
t <- cod %>% drop_na(pred_cm)
#> drop_na: removed 24 rows (<1%), 8,908 rows remaining

# Next find the most abundance prey groups for cod and flounder (to see if the density has any effect
# on the biomass of common prey in their stomachs)
# To do that, I need long format again, group by prey item and summarize
colnames(cod)
#>  [1] "unique_pred_id"         "amphipoda_tot"          "bivalvia_tot"          
#>  [4] "clupeidae_tot"          "clupea_harengus_tot"    "gadus_morhua_tot"      
#>  [7] "gadiformes_tot"         "gobiidae_tot"           "mysidae_tot"           
#> [10] "non_bio_tot"            "other_crustacea_tot"    "other_tot"             
#> [13] "other_pisces_tot"       "platichthys_flesus_tot" "polychaeta_tot"        
#> [16] "saduria_entomon_tot"    "sprattus_sprattus_tot"  "year"                  
#> [19] "quarter"                "cruise"                 "hn"                    
#> [22] "sample"                 "predator_spec"          "x"                     
#> [25] "y"                      "lat"                    "long"                  
#> [28] "ices_rect"              "pred_size_mm"           "pred_weight_g"         
#> [31] "source"                 "tot_prey_biom"          "pred_cm"               
#> [34] "pred_cm_class"
cod_important_prey <- cod %>%
  filter(year > 2014) %>% # Compare only recent data!
  pivot_longer(2:17) %>% 
  group_by(name) %>% 
  summarise(tot_prey = sum(value)) %>% 
  mutate(percent = round(tot_prey / sum(tot_prey), digits = 5)*100) %>%  # calculate also percent of total
  arrange(desc(tot_prey)) %>% 
  mutate(spec = "cod")
#> filter: removed 5,203 rows (58%), 3,729 rows remaining
#> pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 3729x34, now 59664x20]
#> group_by: one grouping variable (name)
#> summarise: now 16 rows and 2 columns, ungrouped
#> mutate: new variable 'percent' (double) with 16 unique values and 0% NA
#> mutate: new variable 'spec' (character) with one unique value and 0% NA

# Same for flounder
fle_important_prey <- fle %>%
  pivot_longer(2:17) %>% 
  group_by(name) %>% 
  summarise(tot_prey = sum(value)) %>% 
  mutate(percent = round(tot_prey / sum(tot_prey), digits = 2)*100) %>%  # calculate also percent of total
  arrange(desc(tot_prey)) %>% 
  mutate(spec = "fle")
#> pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 2257x34, now 36112x20]
#> group_by: one grouping variable (name)
#> summarise: now 16 rows and 2 columns, ungrouped
#> mutate: new variable 'percent' (double) with 8 unique values and 0% NA
#> mutate: new variable 'spec' (character) with one unique value and 0% NA

plotdat <- bind_rows(cod_important_prey, fle_important_prey) %>% filter(percent > 0)
#> filter: removed 8 rows (25%), 24 rows remaining

ggplot(plotdat, aes(reorder(name, desc(percent)), percent, fill = spec)) +
  geom_bar(stat = "identity") + 
  theme_classic(base_size = 16) + 
  theme(axis.text.x = element_text(angle = 90)) +
  scale_fill_brewer(palette = "Set2") + 
  NULL


# Ok, so the species that both prey feed on (even though very little(!)) are:
plotdat %>% arrange(desc(percent)) %>% as.data.frame()
#>                      name tot_prey percent spec
#> 1            bivalvia_tot 3342.430  54.000  fle
#> 2   sprattus_sprattus_tot 8874.500  38.624  cod
#> 3     clupea_harengus_tot 8001.690  34.826  cod
#> 4     saduria_entomon_tot 1609.480  26.000  fle
#> 5               other_tot  488.390   8.000  fle
#> 6        gadus_morhua_tot 1734.260   7.548  cod
#> 7        other_pisces_tot 1712.130   7.452  cod
#> 8     other_crustacea_tot  284.150   5.000  fle
#> 9           clupeidae_tot  835.020   3.634  cod
#> 10  sprattus_sprattus_tot  177.240   3.000  fle
#> 11            mysidae_tot  468.370   2.038  cod
#> 12          amphipoda_tot  109.040   2.000  fle
#> 13    saduria_entomon_tot  371.470   1.617  cod
#> 14           gobiidae_tot  269.600   1.173  cod
#> 15    other_crustacea_tot  261.320   1.137  cod
#> 16       other_pisces_tot   66.470   1.000  fle
#> 17         polychaeta_tot   59.370   1.000  fle
#> 18 platichthys_flesus_tot  210.440   0.916  cod
#> 19         polychaeta_tot  102.460   0.446  cod
#> 20              other_tot   47.970   0.209  cod
#> 21          amphipoda_tot   45.935   0.200  cod
#> 22            non_bio_tot   28.930   0.126  cod
#> 23           bivalvia_tot   10.330   0.045  cod
#> 24         gadiformes_tot    2.080   0.009  cod

Calculate response variables

# Total feeding ratio, proportion of saduria and proportion of common prey!
# Cod
cod <- cod %>% 
  mutate(pred_weight_g = replace_na(pred_weight_g, -9),
         pred_weight_g = ifelse(pred_weight_g == -9, 0.01*pred_cm^3, pred_weight_g), 
         FR_tot = (tot_prey_biom)/(pred_weight_g - tot_prey_biom),
         FR_sad = (saduria_entomon_tot)/(pred_weight_g - tot_prey_biom), # important I remove tot prey here!
         FR_spr = (sprattus_sprattus_tot)/(pred_weight_g - tot_prey_biom), # important I remove tot prey here!
         prop_saduria = saduria_entomon_tot/tot_prey_biom,
         prop_common = (amphipoda_tot + clupea_harengus_tot + clupeidae_tot + other_crustacea_tot + 
                          other_pisces_tot + polychaeta_tot + saduria_entomon_tot + sprattus_sprattus_tot) / tot_prey_biom,
         common_tot = (amphipoda_tot + clupea_harengus_tot + clupeidae_tot + other_crustacea_tot +
                        other_pisces_tot + polychaeta_tot + saduria_entomon_tot + sprattus_sprattus_tot)) %>% 
  filter(FR_tot > -1) # remove negative values of FR
#> mutate: changed 3,360 values (38%) of 'pred_weight_g' (3360 fewer NA)
#>         new variable 'FR_tot' (double) with 7,321 unique values and <1% NA
#>         new variable 'FR_sad' (double) with 1,813 unique values and <1% NA
#>         new variable 'FR_spr' (double) with 1,588 unique values and <1% NA
#>         new variable 'prop_saduria' (double) with 1,192 unique values and 4% NA
#>         new variable 'prop_common' (double) with 1,652 unique values and 4% NA
#>         new variable 'common_tot' (double) with 2,749 unique values and 0% NA
#> filter: removed 249 rows (3%), 8,683 rows remaining

# Flounder  
fle <- fle %>% 
  mutate(pred_weight_g = replace_na(pred_weight_g, -9),
         pred_weight_g = ifelse(pred_weight_g == -9, 0.01*pred_cm^3, pred_weight_g), 
         FR_tot = (tot_prey_biom)/(pred_weight_g - tot_prey_biom), 
         FR_sad = (saduria_entomon_tot)/(pred_weight_g - tot_prey_biom), # important I remove tot prey here!
         FR_spr = (sprattus_sprattus_tot)/(pred_weight_g - tot_prey_biom), # important I remove tot prey here!
         prop_saduria = saduria_entomon_tot/tot_prey_biom,
         prop_common = (amphipoda_tot + clupea_harengus_tot + clupeidae_tot + other_crustacea_tot + 
                          other_pisces_tot + polychaeta_tot + saduria_entomon_tot + sprattus_sprattus_tot) / tot_prey_biom,
         common_tot = (amphipoda_tot + clupea_harengus_tot + clupeidae_tot + other_crustacea_tot +
                        other_pisces_tot + polychaeta_tot + saduria_entomon_tot + sprattus_sprattus_tot)) %>% 
  filter(FR_tot > -1) # remove negative values of FR
#> mutate: changed 911 values (40%) of 'pred_weight_g' (911 fewer NA)
#>         new variable 'FR_tot' (double) with 1,751 unique values and <1% NA
#>         new variable 'FR_sad' (double) with 641 unique values and <1% NA
#>         new variable 'FR_spr' (double) with 41 unique values and <1% NA
#>         new variable 'prop_saduria' (double) with 467 unique values and 19% NA
#>         new variable 'prop_common' (double) with 916 unique values and 19% NA
#>         new variable 'common_tot' (double) with 495 unique values and 0% NA
#> filter: removed 7 rows (<1%), 2,250 rows remaining

# Plot FR_tot for all years
ggplot(cod, aes(year, FR_tot)) + 
  geom_point(size = 3, shape = 21, color = "white", fill = "gray30") + 
  stat_smooth() + 
  facet_wrap(~quarter) + 
  theme_classic(base_size = 16) +
  NULL
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'


# Check individual stomachs with high FR values... 0.5 seems like a reasonable cutoff
cod <- cod %>% filter(FR_tot < 0.5) %>% as.data.frame()
#> filter: removed 12 rows (<1%), 8,671 rows remaining
ggplot(cod, aes(year, FR_tot)) + 
  geom_point(size = 2, shape = 21, color = "white", fill = "gray30") + 
  facet_wrap(~quarter) + 
  NULL


# Check flounder
fle <- fle %>% filter(FR_tot < 0.2) %>% as.data.frame()
#> filter: removed 2 rows (<1%), 2,248 rows remaining
ggplot(fle, aes(year, FR_tot)) + 
  geom_point(size = 2, shape = 21, color = "white", fill = "gray30") + 
  facet_wrap(~quarter) + 
  NULL


# Large gaps, group by time period and make rain-cloud plot
# https://www.cedricscherer.com/2021/06/06/visualizing-distributions-with-raincloud-plots-and-how-to-create-them-with-ggplot2/
cod <- cod %>% 
  mutate(time_period = "1963-1978",
         time_period = ifelse(year > 1980, "1986-1990", time_period),
         time_period = ifelse(year > 1990, "2006-2020", time_period)) %>% 
  mutate(quarter_fact = ifelse(quarter == 1, "Quarter 1", "Quarter 4"))
#> mutate: new variable 'time_period' (character) with 3 unique values and 0% NA
#> mutate: new variable 'quarter_fact' (character) with 2 unique values and 0% NA

p1 <- ggplot(filter(cod, quarter == 1), aes(time_period, FR_tot, color = time_period, fill = time_period)) + 
  ggdist::stat_halfeye(adjust = .5, width = 0.8, .width = 0, justification = -.1, point_colour = NA, alpha = 0.5) + 
  geom_point(shape = 21, color = "white", size = 1.3, alpha = .2, position = position_jitter(seed = 1, width = 0.1)) + 
  geom_boxplot(width = .15, outlier.shape = NA, fill = "white", alpha = 0.2) +  
  scale_color_brewer(palette = "Dark2") +
  scale_fill_brewer(palette = "Dark2") +
  guides(color = FALSE, fill = FALSE) +
  labs(x = "", y = "") +
  coord_cartesian(ylim = c(0, 5)) +
  coord_flip() +
  geom_text(data = cod %>% filter(quarter == 1) %>% group_by(time_period, quarter) %>% summarise(n = n()),
            aes(y = 0.25, x = time_period, label = glue::glue("n = {n}")), nudge_x = 0.3) +
  theme(legend.position = "bottom") + 
  theme_classic(base_size = 10) +
  ggtitle("Quarter 1") +
  NULL
#> filter: removed 3,675 rows (42%), 4,996 rows remaining
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
#> Coordinate system already present. Adding new coordinate system, which will replace the existing one.
#> filter: removed 3,675 rows (42%), 4,996 rows remaining
#> group_by: 2 grouping variables (time_period, quarter)
#> summarise: now 3 rows and 3 columns, one group variable remaining (time_period)

p2 <- ggplot(filter(cod, quarter == 4), aes(time_period, FR_tot, color = time_period, fill = time_period)) + 
  ggdist::stat_halfeye(adjust = .5, width = 0.8, .width = 0, justification = -.1, point_colour = NA, alpha = 0.5) + 
  geom_point(shape = 21, color = "white", size = 1.3, alpha = .2, position = position_jitter(seed = 1, width = 0.1)) + 
  geom_boxplot(width = .15, outlier.shape = NA, fill = "white", alpha = 0.2) +  
  scale_color_brewer(palette = "Dark2") +
  scale_fill_brewer(palette = "Dark2") +
  guides(color = FALSE, fill = FALSE) +
  labs(x = "", y = "Feeding ratio") +
  coord_cartesian(ylim = c(0, 5)) +
  coord_flip() +
  geom_text(data = cod %>% filter(quarter == 4) %>% group_by(time_period, quarter) %>% summarise(n = n()),
            aes(y = 0.25, x = time_period, label = glue::glue("n = {n}")), nudge_x = 0.3) +
  theme(legend.position = "bottom") + 
  theme_classic(base_size = 10) +
  ggtitle("Quarter 4") +
  NULL
#> filter: removed 5,087 rows (59%), 3,584 rows remaining
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
#> Coordinate system already present. Adding new coordinate system, which will replace the existing one.
#> filter: removed 5,087 rows (59%), 3,584 rows remaining
#> group_by: 2 grouping variables (time_period, quarter)
#> summarise: now 2 rows and 3 columns, one group variable remaining (time_period)

p1 / p2


# How many of the old samples don't have coordinates?
# Too many!
t <- cod %>% 
  filter(time_period == "1963-1978") %>% 
  drop_na(lat, long)
#> filter: removed 6,978 rows (80%), 1,693 rows remaining
#> drop_na: no rows removed

Add in density and depth covariates

# First read the density models and predict the values
mcod2_q1 <- readRDS("output/mcod2_q1.rds")
mcod2_q4 <- readRDS("output/mcod2_q4.rds")

mfle2_q1 <- readRDS("output/mfle2_q1.rds")
mfle2_q4 <- readRDS("output/mfle2_q4.rds")

# Add depth and rename coordinates to match the data used for fitting the density model
# Read the tifs
west <- raster("/Users/maxlindmark/Desktop/R_STUDIO_PROJECTS/cod_condition/data/depth_geo_tif/D5_2018_rgb-1.tif")
#plot(west)

east <- raster("/Users/maxlindmark/Desktop/R_STUDIO_PROJECTS/cod_condition/data/depth_geo_tif/D6_2018_rgb-1.tif")
# plot(east)

dep_rast <- raster::merge(west, east)

colnames(cod)
#>  [1] "unique_pred_id"         "amphipoda_tot"          "bivalvia_tot"          
#>  [4] "clupeidae_tot"          "clupea_harengus_tot"    "gadus_morhua_tot"      
#>  [7] "gadiformes_tot"         "gobiidae_tot"           "mysidae_tot"           
#> [10] "non_bio_tot"            "other_crustacea_tot"    "other_tot"             
#> [13] "other_pisces_tot"       "platichthys_flesus_tot" "polychaeta_tot"        
#> [16] "saduria_entomon_tot"    "sprattus_sprattus_tot"  "year"                  
#> [19] "quarter"                "cruise"                 "hn"                    
#> [22] "sample"                 "predator_spec"          "x"                     
#> [25] "y"                      "lat"                    "long"                  
#> [28] "ices_rect"              "pred_size_mm"           "pred_weight_g"         
#> [31] "source"                 "tot_prey_biom"          "pred_cm"               
#> [34] "pred_cm_class"          "FR_tot"                 "FR_sad"                
#> [37] "FR_spr"                 "prop_saduria"           "prop_common"           
#> [40] "common_tot"             "time_period"            "quarter_fact"
colnames(fle)
#>  [1] "unique_pred_id"         "amphipoda_tot"          "bivalvia_tot"          
#>  [4] "clupeidae_tot"          "clupea_harengus_tot"    "gadus_morhua_tot"      
#>  [7] "gadiformes_tot"         "gobiidae_tot"           "mysidae_tot"           
#> [10] "non_bio_tot"            "other_crustacea_tot"    "other_tot"             
#> [13] "other_pisces_tot"       "platichthys_flesus_tot" "polychaeta_tot"        
#> [16] "saduria_entomon_tot"    "sprattus_sprattus_tot"  "year"                  
#> [19] "quarter"                "cruise"                 "hn"                    
#> [22] "sample"                 "predator_spec"          "x"                     
#> [25] "y"                      "lat"                    "long"                  
#> [28] "ices_rect"              "pred_size_mm"           "pred_weight_g"         
#> [31] "source"                 "tot_prey_biom"          "pred_cm"               
#> [34] "pred_cm_class"          "FR_tot"                 "FR_sad"                
#> [37] "FR_spr"                 "prop_saduria"           "prop_common"           
#> [40] "common_tot"
cod$depth <- extract(dep_rast, cod[, 27:26])
fle$depth <- extract(dep_rast, fle[, 27:26])

# Convert to depth (instead of elevation)
ggplot(cod, aes(x, y, color = depth)) + geom_point()

cod$depth <- (cod$depth - max(drop_na(cod)$depth)) *-1
#> drop_na: removed 1,368 rows (16%), 7,303 rows remaining
ggplot(cod, aes(x, y, color = depth)) + geom_point()


fle$depth <- (fle$depth - max(drop_na(fle)$depth)) *-1
#> drop_na: removed 426 rows (19%), 1,822 rows remaining

cod <- cod %>% rename("X" = "x", "Y" = "y")
#> rename: renamed 2 variables (X, Y)
cod <- cod %>% mutate(year = as.integer(year))
#> mutate: converted 'year' from double to integer (0 new NA)

fle <- fle %>% rename("X" = "x", "Y" = "y")
#> rename: renamed 2 variables (X, Y)
fle <- fle %>% mutate(year = as.integer(year))
#> mutate: converted 'year' from double to integer (0 new NA)

# Standardize depth to the DENSITY data, not this data set
density_dat <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/cod_condition/master/data/for_analysis/mdat_cpue.csv")
#> 
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#>   density = col_double(),
#>   year = col_double(),
#>   lat = col_double(),
#>   lon = col_double(),
#>   quarter = col_double(),
#>   haul.id = col_character(),
#>   IDx = col_character(),
#>   ices_rect = col_character(),
#>   SD = col_double(),
#>   density_fle = col_double(),
#>   depth = col_double(),
#>   oxy = col_double(),
#>   temp = col_double(),
#>   X = col_double(),
#>   Y = col_double()
#> )

# Rename to match variables in the density model
cod <- cod %>% mutate(depth_sc = (depth - mean(density_dat$depth)) / sd(density_dat$depth))
#> mutate: new variable 'depth_sc' (double) with 106 unique values and 0% NA
fle <- fle %>% mutate(depth_sc = (depth - mean(density_dat$depth)) / sd(density_dat$depth))
#> mutate: new variable 'depth_sc' (double) with 81 unique values and 0% NA

cod_old <- cod %>% filter(year < 1992) # Filter the old data so that we can bind_rows it later
#> filter: removed 6,914 rows (80%), 1,757 rows remaining
cod <- cod %>% filter(year > 1991)
#> filter: removed 1,757 rows (20%), 6,914 rows remaining

# Filter by quarter so that the prediction data frame is correct
cod_nd_q1 <- filter(cod, quarter == 1)
#> filter: removed 3,593 rows (52%), 3,321 rows remaining
cod_nd_q4 <- filter(cod, quarter == 4)
#> filter: removed 3,412 rows (49%), 3,502 rows remaining

fle_nd_q1 <- filter(fle, quarter == 1)
#> filter: removed 915 rows (41%), 1,333 rows remaining
fle_nd_q4 <- filter(fle, quarter == 4)
#> filter: removed 1,391 rows (62%), 857 rows remaining

# Now predict using the density models
cod_stomach_predcod_density_q1 <- predict(mcod2_q1, newdata = dplyr::select(cod_nd_q1, depth_sc, year, X, Y))
cod_stomach_predcod_density_q4 <- predict(mcod2_q4, newdata = dplyr::select(cod_nd_q4, depth_sc, year, X, Y))

cod_stomach_predfle_density_q1 <- predict(mfle2_q1, newdata = dplyr::select(cod_nd_q1, depth_sc, year, X, Y))
cod_stomach_predfle_density_q4 <- predict(mfle2_q4, newdata = dplyr::select(cod_nd_q4, depth_sc, year, X, Y))

fle_stomach_predcod_density_q1 <- predict(mcod2_q1, newdata = dplyr::select(fle_nd_q1, depth_sc, year, X, Y))
fle_stomach_predcod_density_q4 <- predict(mcod2_q4, newdata = dplyr::select(fle_nd_q4, depth_sc, year, X, Y))

fle_stomach_predfle_density_q1 <- predict(mfle2_q1, newdata = dplyr::select(fle_nd_q1, depth_sc, year, X, Y))
fle_stomach_predfle_density_q4 <- predict(mfle2_q4, newdata = dplyr::select(fle_nd_q4, depth_sc, year, X, Y))

# Add predictions to the diet data
cod <- cod %>% mutate(predcod_density = ifelse(quarter == 1,
                                               exp(cod_stomach_predcod_density_q1$est),
                                               exp(cod_stomach_predcod_density_q4$est)),
                      predcod_density = ifelse(quarter == 2, NA, predcod_density),
                      predfle_density = ifelse(quarter == 1,
                                               exp(cod_stomach_predfle_density_q1$est),
                                               exp(cod_stomach_predfle_density_q4$est)),
                      predfle_density = ifelse(quarter == 2, NA, predfle_density))
#> mutate: new variable 'predcod_density' (double) with 345 unique values and 1% NA
#>         new variable 'predfle_density' (double) with 345 unique values and 1% NA

fle <- fle %>% mutate(predcod_density = ifelse(quarter == 1,
                                               exp(fle_stomach_predcod_density_q1$est),
                                               exp(fle_stomach_predcod_density_q4$est)),
                      predcod_density = ifelse(quarter == 2, NA, predcod_density),
                      predfle_density = ifelse(quarter == 1,
                                               exp(fle_stomach_predfle_density_q1$est),
                                               exp(fle_stomach_predfle_density_q4$est)),
                      predfle_density = ifelse(quarter == 2, NA, predfle_density))
#> mutate: new variable 'predcod_density' (double) with 194 unique values and 3% NA
#>         new variable 'predfle_density' (double) with 193 unique values and 3% NA

# Add scaled predicted densities to the stomach data
cod <- cod %>%
  group_by(quarter) %>% # if we include quarter as a factor, then do NOT group by quarter
  mutate(predcod_density_sc = (predcod_density - mean(predcod_density))/sd(predcod_density),
         predfle_density_sc = (predfle_density - mean(predfle_density))/sd(predfle_density))
#> group_by: one grouping variable (quarter)
#> mutate (grouped): new variable 'predcod_density_sc' (double) with 345 unique values and 1% NA
#>                   new variable 'predfle_density_sc' (double) with 345 unique values and 1% NA

fle <- fle %>%
  group_by(quarter) %>% # if we include quarter as a factor, then do NOT group by quarter
  mutate(predcod_density_sc = (predcod_density - mean(predcod_density))/sd(predcod_density),
         predfle_density_sc = (predfle_density - mean(predfle_density))/sd(predfle_density))
#> group_by: one grouping variable (quarter)
#> mutate (grouped): new variable 'predcod_density_sc' (double) with 194 unique values and 3% NA
#>                   new variable 'predfle_density_sc' (double) with 193 unique values and 3% NA

# Add the old data back using bind_rows so that density estimates get NA
cod <- bind_rows(cod, cod_old)

Add Ices divisions

# https://stackoverflow.com/questions/34272309/extract-shapefile-value-to-point-with-r
# https://gis.ices.dk/sf/
shape <- shapefile("data/ICES_StatRec_mapto_ICES_Areas/StatRec_map_Areas_Full_20170124.shp")
head(shape)
#>   OBJECTID ID ICESNAME SOUTH WEST NORTH EAST AREA_KM2 stat_x stat_y Area_27
#> 0        1 47     47A0  59.0  -44  59.5  -43     3178  -43.5  59.25  14.b.2
#> 1        2 48     48A0  59.5  -44  60.0  -43     3132  -43.5  59.75  14.b.2
#> 2        3 49     49A0  60.0  -44  60.5  -43     3085  -43.5  60.25  14.b.2
#> 3        4 50     50A0  60.5  -44  61.0  -43     3038  -43.5  60.75  14.b.2
#> 4        5 51     51A0  61.0  -44  61.5  -43     2991  -43.5  61.25  14.b.2
#> 5        6 52     52A0  61.5  -44  62.0  -43     2944  -43.5  61.75  14.b.2
#>           Perc      MaxPer RNDMaxPer AreasList Shape_Leng Shape_Area
#> 0 100.00000000 100.0000000       100    14.b.2          3        0.5
#> 1  84.12674145  84.1267414        84    14.b.2          3        0.5
#> 2  24.99803694  24.9980369        25    14.b.2          3        0.5
#> 3  11.97744244  11.9774424        12    14.b.2          3        0.5
#> 4   3.89717625   3.8971762         4    14.b.2          3        0.5
#> 5   0.28578428   0.2857843         0    14.b.2          3        0.5

plot(shape, axes = TRUE)


cod_pts <- SpatialPoints(cbind(cod$long, cod$lat), 
                         proj4string = CRS(proj4string(shape)))
#> Warning in proj4string(shape): CRS object has comment, which is lost in output

fle_pts <- SpatialPoints(cbind(fle$long, fle$lat), 
                         proj4string = CRS(proj4string(shape)))
#> Warning in proj4string(shape): CRS object has comment, which is lost in output

cod$subdiv <- over(cod_pts, shape)$Area_27
cod$subdiv2 <- over(cod_pts, shape)$AreasList

fle$subdiv <- over(fle_pts, shape)$Area_27
fle$subdiv2 <- over(fle_pts, shape)$AreasList

# Rename subdivisions to the more common names and do some more filtering (by sub div and area)
sort(unique(cod$subdiv))
#> [1] "3.d.24"   "3.d.25"   "3.d.26"   "3.d.27"   "3.d.28.2" "3.d.29"
sort(unique(fle$subdiv))
#> [1] "3.d.24"   "3.d.25"   "3.d.26"   "3.d.27"   "3.d.28.2"

cod <- cod %>% 
  mutate(SubDiv = factor(subdiv),
         SubDiv = fct_recode(subdiv,
                             "24" = "3.d.24",
                             "25" = "3.d.25",
                             "26" = "3.d.26",
                             "27" = "3.d.27",
                             "28" = "3.d.28.2"),
         SubDiv = as.character(SubDiv)) %>% 
  filter(SubDiv %in% c("24", "25", "26", "27", "28")) %>% 
  filter(lat > 54 & lat < 58 & long < 22)
#> Warning: Unknown levels in `f`: 3.d.24, 3.d.26, 3.d.27, 3.d.28.2
#> mutate (grouped): new variable 'SubDiv' (character) with 6 unique values and 0% NA
#> filter (grouped): removed 7 rows (<1%), 8,664 rows remaining
#> filter (grouped): removed 44 rows (1%), 8,620 rows remaining

fle <- fle %>% 
  mutate(SubDiv = factor(subdiv),
         SubDiv = fct_recode(subdiv,
                             "24" = "3.d.24",
                             "25" = "3.d.25",
                             "26" = "3.d.26",
                             "27" = "3.d.27",
                             "28" = "3.d.28.2",
                             "29" = "3.d.29"),
         SubDiv = as.character(SubDiv)) %>% 
  filter(SubDiv %in% c("24", "25", "26", "27", "28", "29")) %>% 
  filter(lat > 54 & lat < 58 & long < 22)
#> Warning: Unknown levels in `f`: 3.d.29
#> Warning: Unknown levels in `f`: 3.d.24, 3.d.26, 3.d.27, 3.d.28.2, 3.d.29
#> Warning: Unknown levels in `f`: 3.d.29
#> mutate (grouped): new variable 'SubDiv' (character) with 5 unique values and 0% NA
#> filter (grouped): no rows removed
#> filter (grouped): removed 10 rows (<1%), 2,238 rows remaining

Save data for further analysis

colnames(cod)
#>  [1] "unique_pred_id"         "amphipoda_tot"          "bivalvia_tot"          
#>  [4] "clupeidae_tot"          "clupea_harengus_tot"    "gadus_morhua_tot"      
#>  [7] "gadiformes_tot"         "gobiidae_tot"           "mysidae_tot"           
#> [10] "non_bio_tot"            "other_crustacea_tot"    "other_tot"             
#> [13] "other_pisces_tot"       "platichthys_flesus_tot" "polychaeta_tot"        
#> [16] "saduria_entomon_tot"    "sprattus_sprattus_tot"  "year"                  
#> [19] "quarter"                "cruise"                 "hn"                    
#> [22] "sample"                 "predator_spec"          "X"                     
#> [25] "Y"                      "lat"                    "long"                  
#> [28] "ices_rect"              "pred_size_mm"           "pred_weight_g"         
#> [31] "source"                 "tot_prey_biom"          "pred_cm"               
#> [34] "pred_cm_class"          "FR_tot"                 "FR_sad"                
#> [37] "FR_spr"                 "prop_saduria"           "prop_common"           
#> [40] "common_tot"             "time_period"            "quarter_fact"          
#> [43] "depth"                  "depth_sc"               "predcod_density"       
#> [46] "predfle_density"        "predcod_density_sc"     "predfle_density_sc"    
#> [49] "subdiv"                 "subdiv2"                "SubDiv"

cod_save <- cod %>%
  dplyr::select(FR_tot, FR_sad, FR_spr, saduria_entomon_tot, tot_prey_biom, common_tot, unique_pred_id,
                year, quarter, time_period, quarter_fact, pred_weight_g, pred_cm, predator_spec, predcod_density,
                predfle_density, predcod_density_sc, predfle_density_sc, depth, X, Y, lat, long, ices_rect,
                SubDiv, cruise)

fle_save <- fle %>%
  dplyr::select(FR_tot, FR_sad, FR_spr, saduria_entomon_tot, tot_prey_biom, common_tot, unique_pred_id,
                year, quarter, pred_weight_g, pred_cm, predator_spec, predcod_density, predfle_density,
                predcod_density_sc, predfle_density_sc, depth, X, Y, lat, long, ices_rect, SubDiv, cruise)

write.csv(cod_save, "data/cod_diet_analysis.csv")
write.csv(fle_save, "data/fle_diet_analysis.csv")

# Save for multivariate analysis (all species groups saved)
cod_save_pca <- cod %>%
  dplyr::select(unique_pred_id, year, quarter, pred_weight_g, pred_cm,
                predator_spec, depth, X, Y, lat, long, ices_rect, cruise,
                amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, 
                gadus_morhua_tot, gadiformes_tot, gobiidae_tot, mysidae_tot, non_bio_tot, 
                other_crustacea_tot, other_tot, other_pisces_tot, platichthys_flesus_tot, 
                polychaeta_tot, saduria_entomon_tot, sprattus_sprattus_tot)

fle_save_pca <- fle %>%
  dplyr::select(unique_pred_id, year, quarter, pred_weight_g, pred_cm,
                predator_spec, depth, X, Y, lat, long, ices_rect, cruise,
                amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, 
                gadus_morhua_tot, gadiformes_tot, gobiidae_tot, mysidae_tot, non_bio_tot, 
                other_crustacea_tot, other_tot, other_pisces_tot, platichthys_flesus_tot, 
                polychaeta_tot, saduria_entomon_tot, sprattus_sprattus_tot)

write.csv(cod_save_pca, "data/pca_cod_diet_analysis.csv")
write.csv(fle_save_pca, "data/pca_fle_diet_analysis.csv")

Extra: explore response variables

# How does stomach content scale with size? Probably a little!
ggplot(cod, aes(pred_cm, FR_tot, color = time_period)) +
  geom_point(alpha = 0.4) + 
  facet_wrap(~time_period) +
  stat_smooth(color = "black")
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'


ggplot(fle, aes(pred_cm, FR_tot)) +
  geom_point(alpha = 0.4) + 
  stat_smooth(color = "black")
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'


# r plot cod stomach against density
# Common prey
p1 <- ggplot(cod, aes(predcod_density_sc, prop_common)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()
  
p2 <- ggplot(cod, aes(predcod_density_sc, common_tot/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p3 <- ggplot(cod, aes(predfle_density_sc, prop_common)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p4 <- ggplot(cod, aes(predfle_density_sc, common_tot/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p5 <- ggplot(cod, aes(predfle_density_sc + predcod_density_sc, prop_common)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p6 <- ggplot(cod, aes(predfle_density_sc + predcod_density_sc, common_tot/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

(p1|p2) / (p3|p4) / (p5|p6) + plot_annotation(title = "Cod stomachs", 
                                                  theme = theme(plot.title = element_text(size = 16)))
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 2213 rows containing non-finite values (stat_smooth).
#> Warning: Removed 2213 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 1848 rows containing non-finite values (stat_smooth).
#> Warning: Removed 1848 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 2213 rows containing non-finite values (stat_smooth).
#> Warning: Removed 2213 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 1848 rows containing non-finite values (stat_smooth).
#> Warning: Removed 1848 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 2213 rows containing non-finite values (stat_smooth).
#> Warning: Removed 2213 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 1848 rows containing non-finite values (stat_smooth).
#> Warning: Removed 1848 rows containing missing values (geom_point).


# Saduria
p7 <- ggplot(cod, aes(predcod_density_sc, prop_saduria)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p8 <- ggplot(cod, aes(predcod_density_sc, saduria_entomon_tot/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p9 <- ggplot(cod, aes(predfle_density_sc, prop_saduria)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p10 <- ggplot(cod, aes(predfle_density_sc, saduria_entomon_tot/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p11 <- ggplot(cod, aes(predfle_density_sc + predcod_density_sc, prop_saduria)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p12 <- ggplot(cod, aes(predfle_density_sc + predcod_density_sc, saduria_entomon_tot/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

(p7|p8) / (p9|p10) / (p11|p12) + plot_annotation(title = "Cod stomachs", 
                                                  theme = theme(plot.title = element_text(size = 16)))
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 2213 rows containing non-finite values (stat_smooth).
#> Warning: Removed 2213 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 1848 rows containing non-finite values (stat_smooth).
#> Warning: Removed 1848 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 2213 rows containing non-finite values (stat_smooth).
#> Warning: Removed 2213 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 1848 rows containing non-finite values (stat_smooth).
#> Warning: Removed 1848 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 2213 rows containing non-finite values (stat_smooth).
#> Warning: Removed 2213 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 1848 rows containing non-finite values (stat_smooth).
#> Warning: Removed 1848 rows containing missing values (geom_point).


# Total stomach content (FR)
p13 <- ggplot(cod, aes(predcod_density_sc, FR_tot)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p14 <- ggplot(cod, aes(predfle_density_sc, FR_tot)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p15 <- ggplot(cod, aes(predfle_density_sc + predcod_density, FR_tot)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

(p13/p14/p15) + plot_annotation(title = "Cod stomachs", theme = theme(plot.title = element_text(size = 16)))
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 1848 rows containing non-finite values (stat_smooth).

#> Warning: Removed 1848 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 1848 rows containing non-finite values (stat_smooth).

#> Warning: Removed 1848 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 1848 rows containing non-finite values (stat_smooth).

#> Warning: Removed 1848 rows containing missing values (geom_point).


# Closer inspection of some of the plots...
ggplot(cod, aes(predfle_density_sc, prop_saduria)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth(method = "lm") + 
  ggtitle("prop saduria vs fle density")
#> `geom_smooth()` using formula 'y ~ x'
#> Warning: Removed 2213 rows containing non-finite values (stat_smooth).
#> Warning: Removed 2213 rows containing missing values (geom_point).


ggplot(cod, aes(predfle_density, tot_prey_biom/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth() + 
  ggtitle("g/g tot. stomach vs fle density")
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 1848 rows containing non-finite values (stat_smooth).
#> Warning: Removed 1848 rows containing missing values (geom_point).


ggplot(cod, aes(predfle_density + predcod_density, tot_prey_biom/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth() + 
  ggtitle("g/g tot. stomach vs fle  + coddensity")
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 1848 rows containing non-finite values (stat_smooth).

#> Warning: Removed 1848 rows containing missing values (geom_point).


## Plot flounder stomach against density

# Common prey
p1 <- ggplot(fle, aes(predcod_density_sc, prop_common)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p2 <- ggplot(fle, aes(predcod_density_sc, common_tot/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p3 <- ggplot(fle, aes(predfle_density_sc, prop_common)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p4 <- ggplot(fle, aes(predfle_density_sc, common_tot/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p5 <- ggplot(fle, aes(predfle_density_sc + predcod_density_sc, prop_common)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p6 <- ggplot(fle, aes(predfle_density_sc + predcod_density_sc, common_tot/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

(p1|p2) / (p3|p4) / (p5|p6) + plot_annotation(title = "Flounder stomachs", 
                                                  theme = theme(plot.title = element_text(size = 16)))
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 453 rows containing non-finite values (stat_smooth).
#> Warning: Removed 453 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 58 rows containing non-finite values (stat_smooth).
#> Warning: Removed 58 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 453 rows containing non-finite values (stat_smooth).
#> Warning: Removed 453 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 58 rows containing non-finite values (stat_smooth).
#> Warning: Removed 58 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 453 rows containing non-finite values (stat_smooth).
#> Warning: Removed 453 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 58 rows containing non-finite values (stat_smooth).
#> Warning: Removed 58 rows containing missing values (geom_point).


# Saduria
p7 <- ggplot(fle, aes(predcod_density_sc, prop_saduria)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p8 <- ggplot(fle, aes(predcod_density_sc, saduria_entomon_tot/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p9 <- ggplot(fle, aes(predfle_density_sc, prop_saduria)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p10 <- ggplot(fle, aes(predfle_density_sc, saduria_entomon_tot/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p11 <- ggplot(fle, aes(predfle_density_sc + predcod_density_sc, prop_saduria)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p12 <- ggplot(fle, aes(predfle_density_sc + predcod_density_sc, saduria_entomon_tot/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

(p7|p8) / (p9|p10) / (p11|p12) + plot_annotation(title = "Flounder stomachs", 
                                                  theme = theme(plot.title = element_text(size = 16)))
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 453 rows containing non-finite values (stat_smooth).
#> Warning: Removed 453 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 58 rows containing non-finite values (stat_smooth).
#> Warning: Removed 58 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 453 rows containing non-finite values (stat_smooth).
#> Warning: Removed 453 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 58 rows containing non-finite values (stat_smooth).
#> Warning: Removed 58 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 453 rows containing non-finite values (stat_smooth).
#> Warning: Removed 453 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 58 rows containing non-finite values (stat_smooth).
#> Warning: Removed 58 rows containing missing values (geom_point).


# FR
p13 <- ggplot(fle, aes(predcod_density_sc, FR_tot)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p14 <- ggplot(fle, aes(predfle_density_sc, FR_tot)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

p15 <- ggplot(fle, aes(predfle_density_sc + predcod_density, FR_tot)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth()

(p13/p14/p15) + plot_annotation(title = "Flounder stomachs", theme = theme(plot.title = element_text(size = 16)))
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 58 rows containing non-finite values (stat_smooth).

#> Warning: Removed 58 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 58 rows containing non-finite values (stat_smooth).

#> Warning: Removed 58 rows containing missing values (geom_point).
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 58 rows containing non-finite values (stat_smooth).

#> Warning: Removed 58 rows containing missing values (geom_point).

More in depth plots…

# Does it seem like proportions of the diet changes with density, but not the total amount of food?
# Check saduria and flounder for instance.
pp7 <- ggplot(fle, aes(predcod_density_sc, prop_saduria)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth(method = "lm")

pp8 <- ggplot(filter(fle, (saduria_entomon_tot/pred_weight_g) < 0.1),
             aes(predcod_density_sc, saduria_entomon_tot/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth(method = "lm")
#> filter (grouped): no rows removed

pp13 <- ggplot(filter(fle, (saduria_entomon_tot/pred_weight_g) < 0.1),
              aes(predcod_density_sc, FR_tot)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth(method = "lm")
#> filter (grouped): no rows removed

ppfle <- pp7/pp8/pp13 +
  plot_layout(ncol = 3) + 
  plot_annotation(title = "Flounder stomachs", theme = theme(plot.title = element_text(size = 16)))

# And that cod density is unaffected (or less at least)
p7 <- ggplot(filter(cod, (saduria_entomon_tot/pred_weight_g) < 0.02 & predfle_density_sc < 2),
             aes(predfle_density_sc, prop_saduria)) + # Crop it up a little!
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth(method = "lm")
#> filter (grouped): removed 2,043 rows (24%), 6,577 rows remaining

p8 <- ggplot(filter(cod, (saduria_entomon_tot/pred_weight_g) < 0.02 & predfle_density_sc < 2),
             aes(predfle_density_sc, saduria_entomon_tot/pred_weight_g)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth(method = "lm")
#> filter (grouped): removed 2,043 rows (24%), 6,577 rows remaining

p13 <- ggplot(filter(cod, (saduria_entomon_tot/pred_weight_g) < 0.02 & predfle_density_sc < 2 & FR_tot < 0.5),
              aes(predfle_density_sc, FR_tot)) + 
  geom_point(size = 2, shape = 21, fill = "black", color = "white", alpha = 0.8) + 
  stat_smooth(method = "lm")
#> filter (grouped): removed 2,043 rows (24%), 6,577 rows remaining

ppcod <- p7/p8/p13 +
  plot_layout(ncol = 3) +
  plot_annotation(title = "Cod stomachs", theme = theme(plot.title = element_text(size = 16)))

ppfle / ppcod
#> `geom_smooth()` using formula 'y ~ x'
#> Warning: Removed 453 rows containing non-finite values (stat_smooth).
#> Warning: Removed 453 rows containing missing values (geom_point).
#> `geom_smooth()` using formula 'y ~ x'
#> Warning: Removed 58 rows containing non-finite values (stat_smooth).
#> Warning: Removed 58 rows containing missing values (geom_point).
#> `geom_smooth()` using formula 'y ~ x'
#> Warning: Removed 58 rows containing non-finite values (stat_smooth).

#> Warning: Removed 58 rows containing missing values (geom_point).
#> `geom_smooth()` using formula 'y ~ x'
#> Warning: Removed 364 rows containing non-finite values (stat_smooth).
#> Warning: Removed 364 rows containing missing values (geom_point).
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'


# What does it mean? Yes, the proportion *may* be negatively affected by densities, 
# but the food content is not, which I guess would matter for condition

Extra: try and reproduce Haases figure

# Now, group by survey and Quarter, select only the main groups (as Haase) and see if it resembles the
# previous results.
## Reproduce Haase figures with biomass proportions

# Try to reproduce Haase et al!
# Aggregate across predator individuals and convert to long data for plotting
cod2 <- cod %>%
  drop_na(pred_cm_class) %>% # This is because not all predators have length
  filter(cruise %in% c("BITS", "BITS-1", "BITS-2")) %>%
  filter(year %in% c(2015, 2016, 2017)) %>%
  filter(source == "Kevin's MSC") %>% 
  filter(!pred_cm_class == "(0,6]") %>% 
  group_by(quarter, pred_cm_class) %>% 
  summarise(sprattus_sprattus_tot_all = sum(sprattus_sprattus_tot),
            saduria_entomon_tot_all = sum(saduria_entomon_tot),
            mysidae_tot_all = sum(mysidae_tot),
            clupea_harengus_tot_all = sum(clupea_harengus_tot),
            sprattus_sprattus_tot_all = sum(sprattus_sprattus_tot),
            gobiidae_tot_all = sum(gobiidae_tot),
            gadiformes_tot_all = sum(gadiformes_tot),
            other_pisces_tot_all = sum(other_pisces_tot),
            polychaeta_tot_all = sum(polychaeta_tot)) %>% 
  ungroup() %>% 
  pivot_longer(3:10) %>% 
  group_by(quarter, pred_cm_class) %>% 
  mutate(percent = value/sum(value)) %>%
  ungroup()
#> drop_na (grouped): no rows removed
#> filter (grouped): removed 3,317 rows (38%), 5,303 rows remaining
#> filter (grouped): removed 3,191 rows (60%), 2,112 rows remaining
#> filter (grouped): removed 283 rows (13%), 1,829 rows remaining
#> filter (grouped): removed 16 rows (1%), 1,813 rows remaining
#> group_by: 2 grouping variables (quarter, pred_cm_class)
#> summarise: now 10 rows and 10 columns, one group variable remaining (quarter)
#> ungroup: no grouping variables
#> pivot_longer: reorganized (sprattus_sprattus_tot_all, saduria_entomon_tot_all, mysidae_tot_all, clupea_harengus_tot_all, gobiidae_tot_all, …) into (name, value) [was 10x10, now 80x4]
#> group_by: 2 grouping variables (quarter, pred_cm_class)
#> mutate (grouped): new variable 'percent' (double) with 64 unique values and 0% NA
#> ungroup: no grouping variables
  
# So much herring for small cod?
# A 20 cm cod weighs approx 100 g. Can it have 20g of herring in the stomach?
#small_cod <-
d %>%
  filter(Pred.size.mm < 200 & Prey.sp.code %in% c("clupea harengus", "Clupea harengus")) %>% 
  dplyr::select(Prey.weight, Prey.sp.code, Unique.pred.id, Pred.size.mm)
#> filter: removed 25,589 rows (>99%), 46 rows remaining
#> # A tibble: 46 × 4
#>    Prey.weight Prey.sp.code    Unique.pred.id                       Pred.size.mm
#>          <dbl> <chr>           <chr>                                       <dbl>
#>  1       20.0  Clupea harengus 2016_1_73_735_COD                             180
#>  2       18.4  Clupea harengus 1977_1_16_1977_1_30_16_UNKN_41G8_39…            0
#>  3        8.02 Clupea harengus 1977_1_16_1977_1_30_16_UNKN_41G8_67…            0
#>  4        0.44 Clupea harengus 1977_1_16_1977_1_30_16_UNKN_41G8_68…            0
#>  5        7.25 Clupea harengus 1977_1_9_1977_3_15_9_UNKN_41G8_25_N…            0
#>  6       26.2  Clupea harengus 1977_1_9_1977_3_15_9_UNKN_41G8_26_N…            0
#>  7        6.17 Clupea harengus 1977_1_9_1977_3_15_9_UNKN_41G8_30_N…            0
#>  8       18.9  Clupea harengus 1977_1_9_1977_3_15_9_UNKN_41G8_31_N…            0
#>  9       16.4  Clupea harengus 1977_1_9_1977_3_15_9_UNKN_41G8_38_N…            0
#> 10       44.2  Clupea harengus 1977_1_9_1977_3_15_9_UNKN_41G8_41_N…            0
#> # … with 36 more rows

d_wide4 %>% filter(unique_pred_id == "2016_1_73_735_COD") %>% as.data.frame()
#> filter: removed 11,188 rows (>99%), one row remaining
#>      unique_pred_id amphipoda_tot bivalvia_tot clupeidae_tot
#> 1 2016_1_73_735_COD             0            0             0
#>   clupea_harengus_tot gadus_morhua_tot gadiformes_tot gobiidae_tot mysidae_tot
#> 1               19.99                0              0            0           0
#>   non_bio_tot other_crustacea_tot other_tot other_pisces_tot
#> 1           0                   0         0                0
#>   platichthys_flesus_tot polychaeta_tot saduria_entomon_tot
#> 1                      0           0.05                   0
#>   sprattus_sprattus_tot year quarter cruise hn sample predator_spec        x
#> 1                     0 2016       1   BITS 73    735           COD 613.9639
#>          y    lat   long ices_rect pred_size_mm pred_weight_g      source
#> 1 6271.507 56.574 16.855      42G6          180            46 Kevin's MSC
#>   tot_prey_biom
#> 1         20.04
d %>% filter(Unique.pred.id == "2016_1_73_735_COD") %>% as.data.frame()
#> filter: removed 25,633 rows (>99%), 2 rows remaining
#>   SubDiv Year Month Day N.with.food N.regurgitated N.skeletal N.empty HN Sample
#> 1     27 2016     2  28           1             NA         NA      NA 73    735
#> 2     27 2016     2  28          NA             NA          1      NA 73    735
#>   Length.code    Prey.sp.code Prey.size Prey.weight Prey.nr Stage.digestion
#> 1          Lt Clupea harengus       150       19.99       1               1
#> 2        <NA>  Bylgides sarsi        NA        0.05      NA               2
#>   Comment Parasites.in.stomach Quarter Coun. Ship Cruise Pred.size.mm
#> 1    <NA>                 <NA>       1   SWE   NA   BITS          180
#> 2    <NA>                 <NA>       1   SWE   NA   BITS          180
#>   Pred.weight.g Predator.gutted.weight Predator.code Sex Maturity Gall.content
#> 1            46                     42           COD   M        2            4
#> 2            46                     42           COD   M        2            4
#>   Q.year Age Date        Index    Lat   Long Depth.catch Ices.rect      source
#> 1 1_2016   2 <NA> OXBH.2016.73 56.574 16.855          63      42G6 Kevin's MSC
#> 2 1_2016   2 <NA> OXBH.2016.73 56.574 16.855          63      42G6 Kevin's MSC
#>   Number Sample.type transect Depthstep Gonad.weight.roundfish
#> 1     NA          NA       NA        NA                     NA
#> 2     NA          NA       NA        NA                     NA
#>   Liver.weight.roundfish Stomach.content Perc.stomac.content comments Processed
#> 1                     NA              NA                  NA       NA        NA
#> 2                     NA              NA                  NA       NA        NA
#>      Unique.pred.id coord_from_ices_rect_centre        X        Y
#> 1 2016_1_73_735_COD                           N 613.9639 6271.507
#> 2 2016_1_73_735_COD                           N 613.9639 6271.507

# Check sample sizes per size class (numbers below bars in Kevins figures)
cod2 %>%
  drop_na(pred_cm_class) %>% # This is because not all predators have length
  filter(!pred_cm_class == "(0,6]") %>% 
  group_by(quarter, pred_cm_class) %>% 
  summarise(n = n())
#> drop_na: no rows removed
#> filter: no rows removed
#> group_by: 2 grouping variables (quarter, pred_cm_class)
#> summarise: now 10 rows and 3 columns, one group variable remaining (quarter)
#> # A tibble: 10 × 3
#> # Groups:   quarter [2]
#>    quarter pred_cm_class     n
#>      <dbl> <fct>         <int>
#>  1       1 (6,20]            8
#>  2       1 (20,30]           8
#>  3       1 (30,40]           8
#>  4       1 (40,50]           8
#>  5       1 (50,200]          8
#>  6       4 (6,20]            8
#>  7       4 (20,30]           8
#>  8       4 (30,40]           8
#>  9       4 (40,50]           8
#> 10       4 (50,200]          8
# I seem to have more data...  

# Check Kevin's raw data... 
kev <- read.csv("/Users/maxlindmark/Desktop/R_STUDIO_PROJECTS/bentfish/data/stomach_data/Master_Kevin_updated.csv", sep = ";")
head(kev)
#>   X coun. ship cruise sample.typ      sd ices rect transect       date  qyear
#> 1 1   SWE DANS   BITS      trawl SD26-28 43G8 4363          23.11.2015 4_2015
#> 2 2   SWE DANS   BITS      trawl SD26-28 43G8 4363          23.11.2015 4_2015
#> 3 3   SWE DANS   BITS      trawl SD26-28 43G8 4363          23.11.2015 4_2015
#> 4 4   SWE DANS   BITS      trawl SD26-28 43G8 4363          23.11.2015 4_2015
#> 5 5   SWE DANS   BITS      trawl SD26-28 43G8 4363          23.11.2015 4_2015
#> 6 6   SWE DANS   BITS      trawl SD26-28 43G8 4363          23.11.2015 4_2015
#>   year month day quarter station.haul        index    lat   long depth
#> 1 2015    11  23       4           19 OXBH.2015.19 57,005 18,806    72
#> 2 2015    11  23       4           19 OXBH.2015.19 57,005 18,806    72
#> 3 2015    11  23       4           19 OXBH.2015.19 57,005 18,806    72
#> 4 2015    11  23       4           19 OXBH.2015.19 57,005 18,806    72
#> 5 2015    11  23       4           19 OXBH.2015.19 57,005 18,806    72
#> 6 2015    11  23       4           19 OXBH.2015.19 57,005 18,806    72
#>   depthstep sample pred.code pred.size length lengthclass pred.weight
#> 1      >30m    406       COD       190     19      Lc6-20          70
#> 2      >30m    385       COD       320     32     Lc31-40         246
#> 3      >30m    389       COD       330     33     Lc31-40         330
#> 4      >30m    396       COD       300     30     Lc21-30         193
#> 5      >30m    381       COD       310     31     Lc31-40         244
#> 6      >30m    400       COD       250     25     Lc21-30         136
#>   pred.gutted.weight age sex maturity gonad.weight liver.weight gall.bladder
#> 1                 60   1   M        2           NA            5            1
#> 2                213   3   M        8           NA            9            1
#> 3                499   3   M        2           NA           27            4
#> 4                169   2   M        3           NA           10            4
#> 5                220   2   M        3           NA           11            4
#> 6                115   1   M        3           NA            8            2
#>   specimen.note n.full n.regurgitated n.skeletal n.empty stomachcontent
#> 1                    1             NA         NA      NA           0,87
#> 2                    1             NA         NA      NA           1,78
#> 3                   NA              1         NA      NA              0
#> 4                    1             NA         NA      NA           0,42
#> 5                    1             NA         NA      NA           0,06
#> 6                    1             NA         NA      NA            0,8
#>   perc.stomachcontent length.code    prey.sp.code prey.size prey.weight prey.nr
#> 1                1,24                 Mysis mixta        NA        0,87      19
#> 2                0,72          Ls Clupea harengus        85        1,78       1
#> 3                   0                                    NA           0        
#> 4                0,22                 Mysis mixta        NA        0,42      10
#> 5                0,02                 Mysis mixta        NA        0,06       2
#> 6                0,59                 Mysis mixta        NA         0,8      19
#>   stage.digestion      comment parasites.in.stomach processed
#> 1               1                                    original
#> 2               1 without head                       original
#> 3              NA fresh scales                           sure
#> 4               1                                    original
#> 5               1                                    original
#> 6               1                                    original

kev$prey.weight <- as.numeric(gsub(",", ".", gsub("\\.", "", kev$prey.weight)))

kev %>% 
  filter(lengthclass == "Lc6-20" & pred.code == "COD" & year %in% c(2015, 2016, 2017) & quarter == 1) %>% 
  drop_na(prey.sp.code) %>%
  drop_na(prey.weight) %>%
  group_by(prey.sp.code) %>% 
  summarise(tot_weight_by_prey = sum(prey.weight)) %>% 
  arrange(desc(tot_weight_by_prey)) %>% 
  as.data.frame()
#> filter: removed 15,869 rows (99%), 223 rows remaining
#> drop_na: no rows removed
#> drop_na: no rows removed
#> group_by: one grouping variable (prey.sp.code)
#> summarise: now 23 rows and 2 columns, ungrouped
#>              prey.sp.code tot_weight_by_prey
#> 1         Clupea harengus              19.99
#> 2             Mysis mixta               6.52
#> 3          Bylgides sarsi               4.52
#> 4         Saduria entomon               3.75
#> 5                Gobiidae               3.53
#> 6               Clupeidae               1.29
#> 7       Diastylis rathkei               1.11
#> 8         Crangon crangon               0.58
#> 9              Priapulida               0.40
#> 10           Gammarus sp.               0.28
#> 11                Mysidae               0.22
#> 12 Halicryptus spinulosus               0.17
#> 13                Cumacea               0.15
#> 14      Limecola balthica               0.11
#> 15               Bivalvia               0.10
#> 16   Pontoporeia femorata               0.07
#> 17                Remains               0.06
#> 18              Amphipoda               0.01
#> 19     Monoporeia affinis               0.01
#> 20            Mytilus sp.               0.01
#> 21       Neomysis integer               0.01
#> 22                                      0.00
#> 23               Copepoda               0.00

# OK so probably Kevin made some additional filter, because I also have a larger sample size